forked from OSchip/llvm-project
[BOLT] Fix profile and tests for nop-removal pass
Summary: Since nops are now removed in a separate pass, the profile is consumed on a CFG with nops. If previously a profile was generated without nops, the offsets in the profile could be different if branches included nops either as a source or a destination. This diff adjust offsets to make the profile reading backwards compatible. (cherry picked from FBD33231254)
This commit is contained in:
parent
08f56926c2
commit
ccb99dd126
|
@ -1091,6 +1091,9 @@ public:
|
||||||
uint64_t
|
uint64_t
|
||||||
computeInstructionSize(const MCInst &Inst,
|
computeInstructionSize(const MCInst &Inst,
|
||||||
const MCCodeEmitter *Emitter = nullptr) const {
|
const MCCodeEmitter *Emitter = nullptr) const {
|
||||||
|
if (auto Size = MIB->getAnnotationWithDefault<uint32_t>(Inst, "Size"))
|
||||||
|
return Size;
|
||||||
|
|
||||||
if (!Emitter)
|
if (!Emitter)
|
||||||
Emitter = this->MCE.get();
|
Emitter = this->MCE.get();
|
||||||
SmallString<256> Code;
|
SmallString<256> Code;
|
||||||
|
|
|
@ -105,7 +105,6 @@ public:
|
||||||
/// Detect and eliminate unreachable basic blocks. We could have those
|
/// Detect and eliminate unreachable basic blocks. We could have those
|
||||||
/// filled with nops and they are used for alignment.
|
/// filled with nops and they are used for alignment.
|
||||||
class EliminateUnreachableBlocks : public BinaryFunctionPass {
|
class EliminateUnreachableBlocks : public BinaryFunctionPass {
|
||||||
std::shared_timed_mutex ModifiedMtx;
|
|
||||||
std::unordered_set<const BinaryFunction *> Modified;
|
std::unordered_set<const BinaryFunction *> Modified;
|
||||||
std::atomic<unsigned> DeletedBlocks{0};
|
std::atomic<unsigned> DeletedBlocks{0};
|
||||||
std::atomic<uint64_t> DeletedBytes{0};
|
std::atomic<uint64_t> DeletedBytes{0};
|
||||||
|
|
|
@ -1980,6 +1980,7 @@ bool BinaryFunction::buildCFG(MCPlusBuilder::AllocatorIdTy AllocatorId) {
|
||||||
BinaryBasicBlock *InsertBB = nullptr;
|
BinaryBasicBlock *InsertBB = nullptr;
|
||||||
BinaryBasicBlock *PrevBB = nullptr;
|
BinaryBasicBlock *PrevBB = nullptr;
|
||||||
bool IsLastInstrNop = false;
|
bool IsLastInstrNop = false;
|
||||||
|
// Offset of the last non-nop instruction.
|
||||||
uint64_t LastInstrOffset = 0;
|
uint64_t LastInstrOffset = 0;
|
||||||
|
|
||||||
auto addCFIPlaceholders = [this](uint64_t CFIOffset,
|
auto addCFIPlaceholders = [this](uint64_t CFIOffset,
|
||||||
|
@ -1992,13 +1993,22 @@ bool BinaryFunction::buildCFG(MCPlusBuilder::AllocatorIdTy AllocatorId) {
|
||||||
};
|
};
|
||||||
|
|
||||||
// For profiling purposes we need to save the offset of the last instruction
|
// For profiling purposes we need to save the offset of the last instruction
|
||||||
// in the basic block. But in certain cases we don't if the instruction was
|
// in the basic block.
|
||||||
// the last one, and we have to go back and update its offset.
|
// NOTE: nops always have an Offset annotation. Annotate the last non-nop as
|
||||||
|
// older profiles ignored nops.
|
||||||
auto updateOffset = [&](uint64_t Offset) {
|
auto updateOffset = [&](uint64_t Offset) {
|
||||||
assert(PrevBB && PrevBB != InsertBB && "invalid previous block");
|
assert(PrevBB && PrevBB != InsertBB && "invalid previous block");
|
||||||
MCInst *PrevInstr = PrevBB->getLastNonPseudoInstr();
|
MCInst *LastNonNop = nullptr;
|
||||||
if (PrevInstr && !MIB->hasAnnotation(*PrevInstr, "Offset"))
|
for (BinaryBasicBlock::reverse_iterator RII = PrevBB->getLastNonPseudo(),
|
||||||
MIB->addAnnotation(*PrevInstr, "Offset", static_cast<uint32_t>(Offset),
|
E = PrevBB->rend();
|
||||||
|
RII != E; ++RII) {
|
||||||
|
if (!BC.MIB->isPseudo(*RII) && !BC.MIB->isNoop(*RII)) {
|
||||||
|
LastNonNop = &*RII;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (LastNonNop && !MIB->hasAnnotation(*LastNonNop, "Offset"))
|
||||||
|
MIB->addAnnotation(*LastNonNop, "Offset", static_cast<uint32_t>(Offset),
|
||||||
AllocatorId);
|
AllocatorId);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -2009,7 +2019,7 @@ bool BinaryFunction::buildCFG(MCPlusBuilder::AllocatorIdTy AllocatorId) {
|
||||||
auto LI = Labels.find(Offset);
|
auto LI = Labels.find(Offset);
|
||||||
if (LI != Labels.end()) {
|
if (LI != Labels.end()) {
|
||||||
// Always create new BB at branch destination.
|
// Always create new BB at branch destination.
|
||||||
PrevBB = InsertBB;
|
PrevBB = InsertBB ? InsertBB : PrevBB;
|
||||||
InsertBB = addBasicBlock(LI->first, LI->second,
|
InsertBB = addBasicBlock(LI->first, LI->second,
|
||||||
opts::PreserveBlocksAlignment && IsLastInstrNop);
|
opts::PreserveBlocksAlignment && IsLastInstrNop);
|
||||||
if (PrevBB)
|
if (PrevBB)
|
||||||
|
@ -2020,14 +2030,16 @@ bool BinaryFunction::buildCFG(MCPlusBuilder::AllocatorIdTy AllocatorId) {
|
||||||
bool IsSDTMarker =
|
bool IsSDTMarker =
|
||||||
MIB->isNoop(Instr) && BC.SDTMarkers.count(InstrInputAddr);
|
MIB->isNoop(Instr) && BC.SDTMarkers.count(InstrInputAddr);
|
||||||
bool IsLKMarker = BC.LKMarkers.count(InstrInputAddr);
|
bool IsLKMarker = BC.LKMarkers.count(InstrInputAddr);
|
||||||
if (IsSDTMarker || IsLKMarker) {
|
// Mark all nops with Offset for profile tracking purposes.
|
||||||
HasSDTMarker = true;
|
if (MIB->isNoop(Instr) || IsLKMarker) {
|
||||||
LLVM_DEBUG(dbgs() << "SDTMarker or LKMarker detected in the input at : "
|
if (!MIB->hasAnnotation(Instr, "Offset"))
|
||||||
<< utohexstr(InstrInputAddr) << "\n");
|
|
||||||
if (!MIB->hasAnnotation(Instr, "Offset")) {
|
|
||||||
MIB->addAnnotation(Instr, "Offset", static_cast<uint32_t>(Offset),
|
MIB->addAnnotation(Instr, "Offset", static_cast<uint32_t>(Offset),
|
||||||
AllocatorId);
|
AllocatorId);
|
||||||
}
|
if (IsSDTMarker || IsLKMarker)
|
||||||
|
HasSDTMarker = true;
|
||||||
|
else
|
||||||
|
// Annotate ordinary nops, so we can safely delete them if required.
|
||||||
|
MIB->addAnnotation(Instr, "NOP", static_cast<uint32_t>(1), AllocatorId);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!InsertBB) {
|
if (!InsertBB) {
|
||||||
|
@ -2060,7 +2072,8 @@ bool BinaryFunction::buildCFG(MCPlusBuilder::AllocatorIdTy AllocatorId) {
|
||||||
|
|
||||||
const bool IsBlockEnd = MIB->isTerminator(Instr);
|
const bool IsBlockEnd = MIB->isTerminator(Instr);
|
||||||
IsLastInstrNop = MIB->isNoop(Instr);
|
IsLastInstrNop = MIB->isNoop(Instr);
|
||||||
LastInstrOffset = Offset;
|
if (!IsLastInstrNop)
|
||||||
|
LastInstrOffset = Offset;
|
||||||
InsertBB->addInstruction(std::move(Instr));
|
InsertBB->addInstruction(std::move(Instr));
|
||||||
|
|
||||||
// Add associated CFI instrs. We always add the CFI instruction that is
|
// Add associated CFI instrs. We always add the CFI instruction that is
|
||||||
|
@ -4361,6 +4374,13 @@ MCInst *BinaryFunction::getInstructionAtOffset(uint64_t Offset) {
|
||||||
return &Inst;
|
return &Inst;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (MCInst *LastInstr = BB->getLastNonPseudoInstr()) {
|
||||||
|
const uint32_t Size =
|
||||||
|
BC.MIB->getAnnotationWithDefault<uint32_t>(*LastInstr, "Size");
|
||||||
|
if (BB->getEndOffset() - Offset == Size)
|
||||||
|
return LastInstr;
|
||||||
|
}
|
||||||
|
|
||||||
return nullptr;
|
return nullptr;
|
||||||
} else {
|
} else {
|
||||||
llvm_unreachable("invalid CFG state to use getInstructionAtOffset()");
|
llvm_unreachable("invalid CFG state to use getInstructionAtOffset()");
|
||||||
|
|
|
@ -100,6 +100,28 @@ void BinaryFunction::postProcessProfile() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Fix for old profiles.
|
||||||
|
for (BinaryBasicBlock *BB : BasicBlocks) {
|
||||||
|
if (BB->size() != 1 || BB->succ_size() != 1)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (BB->getKnownExecutionCount() == 0)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
MCInst *Instr = BB->getFirstNonPseudoInstr();
|
||||||
|
assert(Instr && "expected non-pseudo instr");
|
||||||
|
if (!BC.MIB->hasAnnotation(*Instr, "NOP"))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
BinaryBasicBlock *FTSuccessor = BB->getSuccessor();
|
||||||
|
BinaryBasicBlock::BinaryBranchInfo &BI = BB->getBranchInfo(*FTSuccessor);
|
||||||
|
if (!BI.Count) {
|
||||||
|
BI.Count = BB->getKnownExecutionCount();
|
||||||
|
FTSuccessor->setExecutionCount(FTSuccessor->getKnownExecutionCount() +
|
||||||
|
BI.Count);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (opts::FixBlockCounts) {
|
if (opts::FixBlockCounts) {
|
||||||
for (BinaryBasicBlock *BB : BasicBlocks) {
|
for (BinaryBasicBlock *BB : BasicBlocks) {
|
||||||
// Make sure that execution count of a block is at least the branch count
|
// Make sure that execution count of a block is at least the branch count
|
||||||
|
|
|
@ -720,10 +720,9 @@ bool DataReader::recordBranch(BinaryFunction &BF, uint64_t From, uint64_t To,
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (FromBB->succ_size() == 0) {
|
// Return from a tail call.
|
||||||
// Return from a tail call.
|
if (FromBB->succ_size() == 0)
|
||||||
return true;
|
return true;
|
||||||
}
|
|
||||||
|
|
||||||
// Very rarely we will see ignored branches. Do a linear check.
|
// Very rarely we will see ignored branches. Do a linear check.
|
||||||
for (std::pair<uint32_t, uint32_t> &Branch : BF.IgnoredBranches) {
|
for (std::pair<uint32_t, uint32_t> &Branch : BF.IgnoredBranches) {
|
||||||
|
@ -817,10 +816,21 @@ bool DataReader::recordBranch(BinaryFunction &BF, uint64_t From, uint64_t To,
|
||||||
if (collectedInBoltedBinary() && FromBB == ToBB)
|
if (collectedInBoltedBinary() && FromBB == ToBB)
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
LLVM_DEBUG(dbgs() << "invalid branch in " << BF << '\n'
|
BinaryBasicBlock *FTSuccessor = FromBB->getConditionalSuccessor(false);
|
||||||
<< Twine::utohexstr(From) << " -> "
|
if (FTSuccessor && FTSuccessor->succ_size() == 1 &&
|
||||||
<< Twine::utohexstr(To) << '\n');
|
FTSuccessor->getSuccessor(ToBB->getLabel())) {
|
||||||
return false;
|
BinaryBasicBlock::BinaryBranchInfo &FTBI =
|
||||||
|
FTSuccessor->getBranchInfo(*ToBB);
|
||||||
|
FTBI.Count += Count;
|
||||||
|
if (Count)
|
||||||
|
FTBI.MispredictedCount += Mispreds;
|
||||||
|
ToBB = FTSuccessor;
|
||||||
|
} else {
|
||||||
|
LLVM_DEBUG(dbgs() << "invalid branch in " << BF << '\n'
|
||||||
|
<< Twine::utohexstr(From) << " -> "
|
||||||
|
<< Twine::utohexstr(To) << '\n');
|
||||||
|
return false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
BinaryBasicBlock::BinaryBranchInfo &BI = FromBB->getBranchInfo(*ToBB);
|
BinaryBasicBlock::BinaryBranchInfo &BI = FromBB->getBranchInfo(*ToBB);
|
||||||
|
|
|
@ -147,7 +147,7 @@ PrintICP("print-icp",
|
||||||
cl::Hidden,
|
cl::Hidden,
|
||||||
cl::cat(BoltOptCategory));
|
cl::cat(BoltOptCategory));
|
||||||
|
|
||||||
static cl::opt<bool>
|
cl::opt<bool>
|
||||||
PrintNormalized("print-normalized",
|
PrintNormalized("print-normalized",
|
||||||
cl::desc("print functions after CFG is normalized"),
|
cl::desc("print functions after CFG is normalized"),
|
||||||
cl::ZeroOrMore,
|
cl::ZeroOrMore,
|
||||||
|
|
|
@ -43,6 +43,7 @@ extern cl::opt<bool> NeverPrint;
|
||||||
extern cl::opt<std::string> OutputFilename;
|
extern cl::opt<std::string> OutputFilename;
|
||||||
extern cl::opt<bool> PrintAfterBranchFixup;
|
extern cl::opt<bool> PrintAfterBranchFixup;
|
||||||
extern cl::opt<bool> PrintFinalized;
|
extern cl::opt<bool> PrintFinalized;
|
||||||
|
extern cl::opt<bool> PrintNormalized;
|
||||||
extern cl::opt<bool> PrintReordered;
|
extern cl::opt<bool> PrintReordered;
|
||||||
extern cl::opt<bool> PrintSections;
|
extern cl::opt<bool> PrintSections;
|
||||||
extern cl::opt<bool> PrintDisasm;
|
extern cl::opt<bool> PrintDisasm;
|
||||||
|
@ -352,6 +353,13 @@ void MachORewriteInstance::runOptimizationPasses() {
|
||||||
Manager.registerPass(std::make_unique<PatchEntries>());
|
Manager.registerPass(std::make_unique<PatchEntries>());
|
||||||
Manager.registerPass(std::make_unique<Instrumentation>(opts::NeverPrint));
|
Manager.registerPass(std::make_unique<Instrumentation>(opts::NeverPrint));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Manager.registerPass(std::make_unique<ShortenInstructions>(opts::NeverPrint));
|
||||||
|
|
||||||
|
Manager.registerPass(std::make_unique<RemoveNops>(opts::NeverPrint));
|
||||||
|
|
||||||
|
Manager.registerPass(std::make_unique<NormalizeCFG>(opts::PrintNormalized));
|
||||||
|
|
||||||
Manager.registerPass(
|
Manager.registerPass(
|
||||||
std::make_unique<ReorderBasicBlocks>(opts::PrintReordered));
|
std::make_unique<ReorderBasicBlocks>(opts::PrintReordered));
|
||||||
Manager.registerPass(
|
Manager.registerPass(
|
||||||
|
|
|
@ -2816,6 +2816,8 @@ void RewriteInstance::buildFunctionsCFG() {
|
||||||
// Create annotation indices to allow lock-free execution
|
// Create annotation indices to allow lock-free execution
|
||||||
BC->MIB->getOrCreateAnnotationIndex("Offset");
|
BC->MIB->getOrCreateAnnotationIndex("Offset");
|
||||||
BC->MIB->getOrCreateAnnotationIndex("JTIndexReg");
|
BC->MIB->getOrCreateAnnotationIndex("JTIndexReg");
|
||||||
|
BC->MIB->getOrCreateAnnotationIndex("NOP");
|
||||||
|
BC->MIB->getOrCreateAnnotationIndex("Size");
|
||||||
|
|
||||||
ParallelUtilities::WorkFuncWithAllocTy WorkFun =
|
ParallelUtilities::WorkFuncWithAllocTy WorkFun =
|
||||||
[&](BinaryFunction &BF, MCPlusBuilder::AllocatorIdTy AllocId) {
|
[&](BinaryFunction &BF, MCPlusBuilder::AllocatorIdTy AllocId) {
|
||||||
|
|
Loading…
Reference in New Issue