[BOLT] Fix profile and tests for nop-removal pass

Summary:
Since nops are now removed in a separate pass, the profile is consumed
on a CFG with nops. If previously a profile was generated without nops,
the offsets in the profile could be different if branches included nops
either as a source or a destination.

This diff adjust offsets to make the profile reading backwards
compatible.

(cherry picked from FBD33231254)
This commit is contained in:
Maksim Panchenko 2021-12-18 17:05:00 -08:00
parent 08f56926c2
commit ccb99dd126
8 changed files with 86 additions and 22 deletions

View File

@ -1091,6 +1091,9 @@ public:
uint64_t uint64_t
computeInstructionSize(const MCInst &Inst, computeInstructionSize(const MCInst &Inst,
const MCCodeEmitter *Emitter = nullptr) const { const MCCodeEmitter *Emitter = nullptr) const {
if (auto Size = MIB->getAnnotationWithDefault<uint32_t>(Inst, "Size"))
return Size;
if (!Emitter) if (!Emitter)
Emitter = this->MCE.get(); Emitter = this->MCE.get();
SmallString<256> Code; SmallString<256> Code;

View File

@ -105,7 +105,6 @@ public:
/// Detect and eliminate unreachable basic blocks. We could have those /// Detect and eliminate unreachable basic blocks. We could have those
/// filled with nops and they are used for alignment. /// filled with nops and they are used for alignment.
class EliminateUnreachableBlocks : public BinaryFunctionPass { class EliminateUnreachableBlocks : public BinaryFunctionPass {
std::shared_timed_mutex ModifiedMtx;
std::unordered_set<const BinaryFunction *> Modified; std::unordered_set<const BinaryFunction *> Modified;
std::atomic<unsigned> DeletedBlocks{0}; std::atomic<unsigned> DeletedBlocks{0};
std::atomic<uint64_t> DeletedBytes{0}; std::atomic<uint64_t> DeletedBytes{0};

View File

@ -1980,6 +1980,7 @@ bool BinaryFunction::buildCFG(MCPlusBuilder::AllocatorIdTy AllocatorId) {
BinaryBasicBlock *InsertBB = nullptr; BinaryBasicBlock *InsertBB = nullptr;
BinaryBasicBlock *PrevBB = nullptr; BinaryBasicBlock *PrevBB = nullptr;
bool IsLastInstrNop = false; bool IsLastInstrNop = false;
// Offset of the last non-nop instruction.
uint64_t LastInstrOffset = 0; uint64_t LastInstrOffset = 0;
auto addCFIPlaceholders = [this](uint64_t CFIOffset, auto addCFIPlaceholders = [this](uint64_t CFIOffset,
@ -1992,13 +1993,22 @@ bool BinaryFunction::buildCFG(MCPlusBuilder::AllocatorIdTy AllocatorId) {
}; };
// For profiling purposes we need to save the offset of the last instruction // For profiling purposes we need to save the offset of the last instruction
// in the basic block. But in certain cases we don't if the instruction was // in the basic block.
// the last one, and we have to go back and update its offset. // NOTE: nops always have an Offset annotation. Annotate the last non-nop as
// older profiles ignored nops.
auto updateOffset = [&](uint64_t Offset) { auto updateOffset = [&](uint64_t Offset) {
assert(PrevBB && PrevBB != InsertBB && "invalid previous block"); assert(PrevBB && PrevBB != InsertBB && "invalid previous block");
MCInst *PrevInstr = PrevBB->getLastNonPseudoInstr(); MCInst *LastNonNop = nullptr;
if (PrevInstr && !MIB->hasAnnotation(*PrevInstr, "Offset")) for (BinaryBasicBlock::reverse_iterator RII = PrevBB->getLastNonPseudo(),
MIB->addAnnotation(*PrevInstr, "Offset", static_cast<uint32_t>(Offset), E = PrevBB->rend();
RII != E; ++RII) {
if (!BC.MIB->isPseudo(*RII) && !BC.MIB->isNoop(*RII)) {
LastNonNop = &*RII;
break;
}
}
if (LastNonNop && !MIB->hasAnnotation(*LastNonNop, "Offset"))
MIB->addAnnotation(*LastNonNop, "Offset", static_cast<uint32_t>(Offset),
AllocatorId); AllocatorId);
}; };
@ -2009,7 +2019,7 @@ bool BinaryFunction::buildCFG(MCPlusBuilder::AllocatorIdTy AllocatorId) {
auto LI = Labels.find(Offset); auto LI = Labels.find(Offset);
if (LI != Labels.end()) { if (LI != Labels.end()) {
// Always create new BB at branch destination. // Always create new BB at branch destination.
PrevBB = InsertBB; PrevBB = InsertBB ? InsertBB : PrevBB;
InsertBB = addBasicBlock(LI->first, LI->second, InsertBB = addBasicBlock(LI->first, LI->second,
opts::PreserveBlocksAlignment && IsLastInstrNop); opts::PreserveBlocksAlignment && IsLastInstrNop);
if (PrevBB) if (PrevBB)
@ -2020,14 +2030,16 @@ bool BinaryFunction::buildCFG(MCPlusBuilder::AllocatorIdTy AllocatorId) {
bool IsSDTMarker = bool IsSDTMarker =
MIB->isNoop(Instr) && BC.SDTMarkers.count(InstrInputAddr); MIB->isNoop(Instr) && BC.SDTMarkers.count(InstrInputAddr);
bool IsLKMarker = BC.LKMarkers.count(InstrInputAddr); bool IsLKMarker = BC.LKMarkers.count(InstrInputAddr);
if (IsSDTMarker || IsLKMarker) { // Mark all nops with Offset for profile tracking purposes.
HasSDTMarker = true; if (MIB->isNoop(Instr) || IsLKMarker) {
LLVM_DEBUG(dbgs() << "SDTMarker or LKMarker detected in the input at : " if (!MIB->hasAnnotation(Instr, "Offset"))
<< utohexstr(InstrInputAddr) << "\n");
if (!MIB->hasAnnotation(Instr, "Offset")) {
MIB->addAnnotation(Instr, "Offset", static_cast<uint32_t>(Offset), MIB->addAnnotation(Instr, "Offset", static_cast<uint32_t>(Offset),
AllocatorId); AllocatorId);
} if (IsSDTMarker || IsLKMarker)
HasSDTMarker = true;
else
// Annotate ordinary nops, so we can safely delete them if required.
MIB->addAnnotation(Instr, "NOP", static_cast<uint32_t>(1), AllocatorId);
} }
if (!InsertBB) { if (!InsertBB) {
@ -2060,7 +2072,8 @@ bool BinaryFunction::buildCFG(MCPlusBuilder::AllocatorIdTy AllocatorId) {
const bool IsBlockEnd = MIB->isTerminator(Instr); const bool IsBlockEnd = MIB->isTerminator(Instr);
IsLastInstrNop = MIB->isNoop(Instr); IsLastInstrNop = MIB->isNoop(Instr);
LastInstrOffset = Offset; if (!IsLastInstrNop)
LastInstrOffset = Offset;
InsertBB->addInstruction(std::move(Instr)); InsertBB->addInstruction(std::move(Instr));
// Add associated CFI instrs. We always add the CFI instruction that is // Add associated CFI instrs. We always add the CFI instruction that is
@ -4361,6 +4374,13 @@ MCInst *BinaryFunction::getInstructionAtOffset(uint64_t Offset) {
return &Inst; return &Inst;
} }
if (MCInst *LastInstr = BB->getLastNonPseudoInstr()) {
const uint32_t Size =
BC.MIB->getAnnotationWithDefault<uint32_t>(*LastInstr, "Size");
if (BB->getEndOffset() - Offset == Size)
return LastInstr;
}
return nullptr; return nullptr;
} else { } else {
llvm_unreachable("invalid CFG state to use getInstructionAtOffset()"); llvm_unreachable("invalid CFG state to use getInstructionAtOffset()");

View File

@ -100,6 +100,28 @@ void BinaryFunction::postProcessProfile() {
} }
} }
// Fix for old profiles.
for (BinaryBasicBlock *BB : BasicBlocks) {
if (BB->size() != 1 || BB->succ_size() != 1)
continue;
if (BB->getKnownExecutionCount() == 0)
continue;
MCInst *Instr = BB->getFirstNonPseudoInstr();
assert(Instr && "expected non-pseudo instr");
if (!BC.MIB->hasAnnotation(*Instr, "NOP"))
continue;
BinaryBasicBlock *FTSuccessor = BB->getSuccessor();
BinaryBasicBlock::BinaryBranchInfo &BI = BB->getBranchInfo(*FTSuccessor);
if (!BI.Count) {
BI.Count = BB->getKnownExecutionCount();
FTSuccessor->setExecutionCount(FTSuccessor->getKnownExecutionCount() +
BI.Count);
}
}
if (opts::FixBlockCounts) { if (opts::FixBlockCounts) {
for (BinaryBasicBlock *BB : BasicBlocks) { for (BinaryBasicBlock *BB : BasicBlocks) {
// Make sure that execution count of a block is at least the branch count // Make sure that execution count of a block is at least the branch count

View File

@ -720,10 +720,9 @@ bool DataReader::recordBranch(BinaryFunction &BF, uint64_t From, uint64_t To,
return true; return true;
} }
if (FromBB->succ_size() == 0) { // Return from a tail call.
// Return from a tail call. if (FromBB->succ_size() == 0)
return true; return true;
}
// Very rarely we will see ignored branches. Do a linear check. // Very rarely we will see ignored branches. Do a linear check.
for (std::pair<uint32_t, uint32_t> &Branch : BF.IgnoredBranches) { for (std::pair<uint32_t, uint32_t> &Branch : BF.IgnoredBranches) {
@ -817,10 +816,21 @@ bool DataReader::recordBranch(BinaryFunction &BF, uint64_t From, uint64_t To,
if (collectedInBoltedBinary() && FromBB == ToBB) if (collectedInBoltedBinary() && FromBB == ToBB)
return true; return true;
LLVM_DEBUG(dbgs() << "invalid branch in " << BF << '\n' BinaryBasicBlock *FTSuccessor = FromBB->getConditionalSuccessor(false);
<< Twine::utohexstr(From) << " -> " if (FTSuccessor && FTSuccessor->succ_size() == 1 &&
<< Twine::utohexstr(To) << '\n'); FTSuccessor->getSuccessor(ToBB->getLabel())) {
return false; BinaryBasicBlock::BinaryBranchInfo &FTBI =
FTSuccessor->getBranchInfo(*ToBB);
FTBI.Count += Count;
if (Count)
FTBI.MispredictedCount += Mispreds;
ToBB = FTSuccessor;
} else {
LLVM_DEBUG(dbgs() << "invalid branch in " << BF << '\n'
<< Twine::utohexstr(From) << " -> "
<< Twine::utohexstr(To) << '\n');
return false;
}
} }
BinaryBasicBlock::BinaryBranchInfo &BI = FromBB->getBranchInfo(*ToBB); BinaryBasicBlock::BinaryBranchInfo &BI = FromBB->getBranchInfo(*ToBB);

View File

@ -147,7 +147,7 @@ PrintICP("print-icp",
cl::Hidden, cl::Hidden,
cl::cat(BoltOptCategory)); cl::cat(BoltOptCategory));
static cl::opt<bool> cl::opt<bool>
PrintNormalized("print-normalized", PrintNormalized("print-normalized",
cl::desc("print functions after CFG is normalized"), cl::desc("print functions after CFG is normalized"),
cl::ZeroOrMore, cl::ZeroOrMore,

View File

@ -43,6 +43,7 @@ extern cl::opt<bool> NeverPrint;
extern cl::opt<std::string> OutputFilename; extern cl::opt<std::string> OutputFilename;
extern cl::opt<bool> PrintAfterBranchFixup; extern cl::opt<bool> PrintAfterBranchFixup;
extern cl::opt<bool> PrintFinalized; extern cl::opt<bool> PrintFinalized;
extern cl::opt<bool> PrintNormalized;
extern cl::opt<bool> PrintReordered; extern cl::opt<bool> PrintReordered;
extern cl::opt<bool> PrintSections; extern cl::opt<bool> PrintSections;
extern cl::opt<bool> PrintDisasm; extern cl::opt<bool> PrintDisasm;
@ -352,6 +353,13 @@ void MachORewriteInstance::runOptimizationPasses() {
Manager.registerPass(std::make_unique<PatchEntries>()); Manager.registerPass(std::make_unique<PatchEntries>());
Manager.registerPass(std::make_unique<Instrumentation>(opts::NeverPrint)); Manager.registerPass(std::make_unique<Instrumentation>(opts::NeverPrint));
} }
Manager.registerPass(std::make_unique<ShortenInstructions>(opts::NeverPrint));
Manager.registerPass(std::make_unique<RemoveNops>(opts::NeverPrint));
Manager.registerPass(std::make_unique<NormalizeCFG>(opts::PrintNormalized));
Manager.registerPass( Manager.registerPass(
std::make_unique<ReorderBasicBlocks>(opts::PrintReordered)); std::make_unique<ReorderBasicBlocks>(opts::PrintReordered));
Manager.registerPass( Manager.registerPass(

View File

@ -2816,6 +2816,8 @@ void RewriteInstance::buildFunctionsCFG() {
// Create annotation indices to allow lock-free execution // Create annotation indices to allow lock-free execution
BC->MIB->getOrCreateAnnotationIndex("Offset"); BC->MIB->getOrCreateAnnotationIndex("Offset");
BC->MIB->getOrCreateAnnotationIndex("JTIndexReg"); BC->MIB->getOrCreateAnnotationIndex("JTIndexReg");
BC->MIB->getOrCreateAnnotationIndex("NOP");
BC->MIB->getOrCreateAnnotationIndex("Size");
ParallelUtilities::WorkFuncWithAllocTy WorkFun = ParallelUtilities::WorkFuncWithAllocTy WorkFun =
[&](BinaryFunction &BF, MCPlusBuilder::AllocatorIdTy AllocId) { [&](BinaryFunction &BF, MCPlusBuilder::AllocatorIdTy AllocId) {