[BOLT] Fix profile and tests for nop-removal pass

Summary:
Since nops are now removed in a separate pass, the profile is consumed
on a CFG with nops. If previously a profile was generated without nops,
the offsets in the profile could be different if branches included nops
either as a source or a destination.

This diff adjust offsets to make the profile reading backwards
compatible.

(cherry picked from FBD33231254)
This commit is contained in:
Maksim Panchenko 2021-12-18 17:05:00 -08:00
parent 08f56926c2
commit ccb99dd126
8 changed files with 86 additions and 22 deletions

View File

@ -1091,6 +1091,9 @@ public:
uint64_t
computeInstructionSize(const MCInst &Inst,
const MCCodeEmitter *Emitter = nullptr) const {
if (auto Size = MIB->getAnnotationWithDefault<uint32_t>(Inst, "Size"))
return Size;
if (!Emitter)
Emitter = this->MCE.get();
SmallString<256> Code;

View File

@ -105,7 +105,6 @@ public:
/// Detect and eliminate unreachable basic blocks. We could have those
/// filled with nops and they are used for alignment.
class EliminateUnreachableBlocks : public BinaryFunctionPass {
std::shared_timed_mutex ModifiedMtx;
std::unordered_set<const BinaryFunction *> Modified;
std::atomic<unsigned> DeletedBlocks{0};
std::atomic<uint64_t> DeletedBytes{0};

View File

@ -1980,6 +1980,7 @@ bool BinaryFunction::buildCFG(MCPlusBuilder::AllocatorIdTy AllocatorId) {
BinaryBasicBlock *InsertBB = nullptr;
BinaryBasicBlock *PrevBB = nullptr;
bool IsLastInstrNop = false;
// Offset of the last non-nop instruction.
uint64_t LastInstrOffset = 0;
auto addCFIPlaceholders = [this](uint64_t CFIOffset,
@ -1992,13 +1993,22 @@ bool BinaryFunction::buildCFG(MCPlusBuilder::AllocatorIdTy AllocatorId) {
};
// For profiling purposes we need to save the offset of the last instruction
// in the basic block. But in certain cases we don't if the instruction was
// the last one, and we have to go back and update its offset.
// in the basic block.
// NOTE: nops always have an Offset annotation. Annotate the last non-nop as
// older profiles ignored nops.
auto updateOffset = [&](uint64_t Offset) {
assert(PrevBB && PrevBB != InsertBB && "invalid previous block");
MCInst *PrevInstr = PrevBB->getLastNonPseudoInstr();
if (PrevInstr && !MIB->hasAnnotation(*PrevInstr, "Offset"))
MIB->addAnnotation(*PrevInstr, "Offset", static_cast<uint32_t>(Offset),
MCInst *LastNonNop = nullptr;
for (BinaryBasicBlock::reverse_iterator RII = PrevBB->getLastNonPseudo(),
E = PrevBB->rend();
RII != E; ++RII) {
if (!BC.MIB->isPseudo(*RII) && !BC.MIB->isNoop(*RII)) {
LastNonNop = &*RII;
break;
}
}
if (LastNonNop && !MIB->hasAnnotation(*LastNonNop, "Offset"))
MIB->addAnnotation(*LastNonNop, "Offset", static_cast<uint32_t>(Offset),
AllocatorId);
};
@ -2009,7 +2019,7 @@ bool BinaryFunction::buildCFG(MCPlusBuilder::AllocatorIdTy AllocatorId) {
auto LI = Labels.find(Offset);
if (LI != Labels.end()) {
// Always create new BB at branch destination.
PrevBB = InsertBB;
PrevBB = InsertBB ? InsertBB : PrevBB;
InsertBB = addBasicBlock(LI->first, LI->second,
opts::PreserveBlocksAlignment && IsLastInstrNop);
if (PrevBB)
@ -2020,14 +2030,16 @@ bool BinaryFunction::buildCFG(MCPlusBuilder::AllocatorIdTy AllocatorId) {
bool IsSDTMarker =
MIB->isNoop(Instr) && BC.SDTMarkers.count(InstrInputAddr);
bool IsLKMarker = BC.LKMarkers.count(InstrInputAddr);
if (IsSDTMarker || IsLKMarker) {
HasSDTMarker = true;
LLVM_DEBUG(dbgs() << "SDTMarker or LKMarker detected in the input at : "
<< utohexstr(InstrInputAddr) << "\n");
if (!MIB->hasAnnotation(Instr, "Offset")) {
// Mark all nops with Offset for profile tracking purposes.
if (MIB->isNoop(Instr) || IsLKMarker) {
if (!MIB->hasAnnotation(Instr, "Offset"))
MIB->addAnnotation(Instr, "Offset", static_cast<uint32_t>(Offset),
AllocatorId);
}
if (IsSDTMarker || IsLKMarker)
HasSDTMarker = true;
else
// Annotate ordinary nops, so we can safely delete them if required.
MIB->addAnnotation(Instr, "NOP", static_cast<uint32_t>(1), AllocatorId);
}
if (!InsertBB) {
@ -2060,7 +2072,8 @@ bool BinaryFunction::buildCFG(MCPlusBuilder::AllocatorIdTy AllocatorId) {
const bool IsBlockEnd = MIB->isTerminator(Instr);
IsLastInstrNop = MIB->isNoop(Instr);
LastInstrOffset = Offset;
if (!IsLastInstrNop)
LastInstrOffset = Offset;
InsertBB->addInstruction(std::move(Instr));
// Add associated CFI instrs. We always add the CFI instruction that is
@ -4361,6 +4374,13 @@ MCInst *BinaryFunction::getInstructionAtOffset(uint64_t Offset) {
return &Inst;
}
if (MCInst *LastInstr = BB->getLastNonPseudoInstr()) {
const uint32_t Size =
BC.MIB->getAnnotationWithDefault<uint32_t>(*LastInstr, "Size");
if (BB->getEndOffset() - Offset == Size)
return LastInstr;
}
return nullptr;
} else {
llvm_unreachable("invalid CFG state to use getInstructionAtOffset()");

View File

@ -100,6 +100,28 @@ void BinaryFunction::postProcessProfile() {
}
}
// Fix for old profiles.
for (BinaryBasicBlock *BB : BasicBlocks) {
if (BB->size() != 1 || BB->succ_size() != 1)
continue;
if (BB->getKnownExecutionCount() == 0)
continue;
MCInst *Instr = BB->getFirstNonPseudoInstr();
assert(Instr && "expected non-pseudo instr");
if (!BC.MIB->hasAnnotation(*Instr, "NOP"))
continue;
BinaryBasicBlock *FTSuccessor = BB->getSuccessor();
BinaryBasicBlock::BinaryBranchInfo &BI = BB->getBranchInfo(*FTSuccessor);
if (!BI.Count) {
BI.Count = BB->getKnownExecutionCount();
FTSuccessor->setExecutionCount(FTSuccessor->getKnownExecutionCount() +
BI.Count);
}
}
if (opts::FixBlockCounts) {
for (BinaryBasicBlock *BB : BasicBlocks) {
// Make sure that execution count of a block is at least the branch count

View File

@ -720,10 +720,9 @@ bool DataReader::recordBranch(BinaryFunction &BF, uint64_t From, uint64_t To,
return true;
}
if (FromBB->succ_size() == 0) {
// Return from a tail call.
// Return from a tail call.
if (FromBB->succ_size() == 0)
return true;
}
// Very rarely we will see ignored branches. Do a linear check.
for (std::pair<uint32_t, uint32_t> &Branch : BF.IgnoredBranches) {
@ -817,10 +816,21 @@ bool DataReader::recordBranch(BinaryFunction &BF, uint64_t From, uint64_t To,
if (collectedInBoltedBinary() && FromBB == ToBB)
return true;
LLVM_DEBUG(dbgs() << "invalid branch in " << BF << '\n'
<< Twine::utohexstr(From) << " -> "
<< Twine::utohexstr(To) << '\n');
return false;
BinaryBasicBlock *FTSuccessor = FromBB->getConditionalSuccessor(false);
if (FTSuccessor && FTSuccessor->succ_size() == 1 &&
FTSuccessor->getSuccessor(ToBB->getLabel())) {
BinaryBasicBlock::BinaryBranchInfo &FTBI =
FTSuccessor->getBranchInfo(*ToBB);
FTBI.Count += Count;
if (Count)
FTBI.MispredictedCount += Mispreds;
ToBB = FTSuccessor;
} else {
LLVM_DEBUG(dbgs() << "invalid branch in " << BF << '\n'
<< Twine::utohexstr(From) << " -> "
<< Twine::utohexstr(To) << '\n');
return false;
}
}
BinaryBasicBlock::BinaryBranchInfo &BI = FromBB->getBranchInfo(*ToBB);

View File

@ -147,7 +147,7 @@ PrintICP("print-icp",
cl::Hidden,
cl::cat(BoltOptCategory));
static cl::opt<bool>
cl::opt<bool>
PrintNormalized("print-normalized",
cl::desc("print functions after CFG is normalized"),
cl::ZeroOrMore,

View File

@ -43,6 +43,7 @@ extern cl::opt<bool> NeverPrint;
extern cl::opt<std::string> OutputFilename;
extern cl::opt<bool> PrintAfterBranchFixup;
extern cl::opt<bool> PrintFinalized;
extern cl::opt<bool> PrintNormalized;
extern cl::opt<bool> PrintReordered;
extern cl::opt<bool> PrintSections;
extern cl::opt<bool> PrintDisasm;
@ -352,6 +353,13 @@ void MachORewriteInstance::runOptimizationPasses() {
Manager.registerPass(std::make_unique<PatchEntries>());
Manager.registerPass(std::make_unique<Instrumentation>(opts::NeverPrint));
}
Manager.registerPass(std::make_unique<ShortenInstructions>(opts::NeverPrint));
Manager.registerPass(std::make_unique<RemoveNops>(opts::NeverPrint));
Manager.registerPass(std::make_unique<NormalizeCFG>(opts::PrintNormalized));
Manager.registerPass(
std::make_unique<ReorderBasicBlocks>(opts::PrintReordered));
Manager.registerPass(

View File

@ -2816,6 +2816,8 @@ void RewriteInstance::buildFunctionsCFG() {
// Create annotation indices to allow lock-free execution
BC->MIB->getOrCreateAnnotationIndex("Offset");
BC->MIB->getOrCreateAnnotationIndex("JTIndexReg");
BC->MIB->getOrCreateAnnotationIndex("NOP");
BC->MIB->getOrCreateAnnotationIndex("Size");
ParallelUtilities::WorkFuncWithAllocTy WorkFun =
[&](BinaryFunction &BF, MCPlusBuilder::AllocatorIdTy AllocId) {