forked from OSchip/llvm-project
[BOLT] Fix profile and tests for nop-removal pass
Summary: Since nops are now removed in a separate pass, the profile is consumed on a CFG with nops. If previously a profile was generated without nops, the offsets in the profile could be different if branches included nops either as a source or a destination. This diff adjust offsets to make the profile reading backwards compatible. (cherry picked from FBD33231254)
This commit is contained in:
parent
08f56926c2
commit
ccb99dd126
|
@ -1091,6 +1091,9 @@ public:
|
|||
uint64_t
|
||||
computeInstructionSize(const MCInst &Inst,
|
||||
const MCCodeEmitter *Emitter = nullptr) const {
|
||||
if (auto Size = MIB->getAnnotationWithDefault<uint32_t>(Inst, "Size"))
|
||||
return Size;
|
||||
|
||||
if (!Emitter)
|
||||
Emitter = this->MCE.get();
|
||||
SmallString<256> Code;
|
||||
|
|
|
@ -105,7 +105,6 @@ public:
|
|||
/// Detect and eliminate unreachable basic blocks. We could have those
|
||||
/// filled with nops and they are used for alignment.
|
||||
class EliminateUnreachableBlocks : public BinaryFunctionPass {
|
||||
std::shared_timed_mutex ModifiedMtx;
|
||||
std::unordered_set<const BinaryFunction *> Modified;
|
||||
std::atomic<unsigned> DeletedBlocks{0};
|
||||
std::atomic<uint64_t> DeletedBytes{0};
|
||||
|
|
|
@ -1980,6 +1980,7 @@ bool BinaryFunction::buildCFG(MCPlusBuilder::AllocatorIdTy AllocatorId) {
|
|||
BinaryBasicBlock *InsertBB = nullptr;
|
||||
BinaryBasicBlock *PrevBB = nullptr;
|
||||
bool IsLastInstrNop = false;
|
||||
// Offset of the last non-nop instruction.
|
||||
uint64_t LastInstrOffset = 0;
|
||||
|
||||
auto addCFIPlaceholders = [this](uint64_t CFIOffset,
|
||||
|
@ -1992,13 +1993,22 @@ bool BinaryFunction::buildCFG(MCPlusBuilder::AllocatorIdTy AllocatorId) {
|
|||
};
|
||||
|
||||
// For profiling purposes we need to save the offset of the last instruction
|
||||
// in the basic block. But in certain cases we don't if the instruction was
|
||||
// the last one, and we have to go back and update its offset.
|
||||
// in the basic block.
|
||||
// NOTE: nops always have an Offset annotation. Annotate the last non-nop as
|
||||
// older profiles ignored nops.
|
||||
auto updateOffset = [&](uint64_t Offset) {
|
||||
assert(PrevBB && PrevBB != InsertBB && "invalid previous block");
|
||||
MCInst *PrevInstr = PrevBB->getLastNonPseudoInstr();
|
||||
if (PrevInstr && !MIB->hasAnnotation(*PrevInstr, "Offset"))
|
||||
MIB->addAnnotation(*PrevInstr, "Offset", static_cast<uint32_t>(Offset),
|
||||
MCInst *LastNonNop = nullptr;
|
||||
for (BinaryBasicBlock::reverse_iterator RII = PrevBB->getLastNonPseudo(),
|
||||
E = PrevBB->rend();
|
||||
RII != E; ++RII) {
|
||||
if (!BC.MIB->isPseudo(*RII) && !BC.MIB->isNoop(*RII)) {
|
||||
LastNonNop = &*RII;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (LastNonNop && !MIB->hasAnnotation(*LastNonNop, "Offset"))
|
||||
MIB->addAnnotation(*LastNonNop, "Offset", static_cast<uint32_t>(Offset),
|
||||
AllocatorId);
|
||||
};
|
||||
|
||||
|
@ -2009,7 +2019,7 @@ bool BinaryFunction::buildCFG(MCPlusBuilder::AllocatorIdTy AllocatorId) {
|
|||
auto LI = Labels.find(Offset);
|
||||
if (LI != Labels.end()) {
|
||||
// Always create new BB at branch destination.
|
||||
PrevBB = InsertBB;
|
||||
PrevBB = InsertBB ? InsertBB : PrevBB;
|
||||
InsertBB = addBasicBlock(LI->first, LI->second,
|
||||
opts::PreserveBlocksAlignment && IsLastInstrNop);
|
||||
if (PrevBB)
|
||||
|
@ -2020,14 +2030,16 @@ bool BinaryFunction::buildCFG(MCPlusBuilder::AllocatorIdTy AllocatorId) {
|
|||
bool IsSDTMarker =
|
||||
MIB->isNoop(Instr) && BC.SDTMarkers.count(InstrInputAddr);
|
||||
bool IsLKMarker = BC.LKMarkers.count(InstrInputAddr);
|
||||
if (IsSDTMarker || IsLKMarker) {
|
||||
HasSDTMarker = true;
|
||||
LLVM_DEBUG(dbgs() << "SDTMarker or LKMarker detected in the input at : "
|
||||
<< utohexstr(InstrInputAddr) << "\n");
|
||||
if (!MIB->hasAnnotation(Instr, "Offset")) {
|
||||
// Mark all nops with Offset for profile tracking purposes.
|
||||
if (MIB->isNoop(Instr) || IsLKMarker) {
|
||||
if (!MIB->hasAnnotation(Instr, "Offset"))
|
||||
MIB->addAnnotation(Instr, "Offset", static_cast<uint32_t>(Offset),
|
||||
AllocatorId);
|
||||
}
|
||||
if (IsSDTMarker || IsLKMarker)
|
||||
HasSDTMarker = true;
|
||||
else
|
||||
// Annotate ordinary nops, so we can safely delete them if required.
|
||||
MIB->addAnnotation(Instr, "NOP", static_cast<uint32_t>(1), AllocatorId);
|
||||
}
|
||||
|
||||
if (!InsertBB) {
|
||||
|
@ -2060,7 +2072,8 @@ bool BinaryFunction::buildCFG(MCPlusBuilder::AllocatorIdTy AllocatorId) {
|
|||
|
||||
const bool IsBlockEnd = MIB->isTerminator(Instr);
|
||||
IsLastInstrNop = MIB->isNoop(Instr);
|
||||
LastInstrOffset = Offset;
|
||||
if (!IsLastInstrNop)
|
||||
LastInstrOffset = Offset;
|
||||
InsertBB->addInstruction(std::move(Instr));
|
||||
|
||||
// Add associated CFI instrs. We always add the CFI instruction that is
|
||||
|
@ -4361,6 +4374,13 @@ MCInst *BinaryFunction::getInstructionAtOffset(uint64_t Offset) {
|
|||
return &Inst;
|
||||
}
|
||||
|
||||
if (MCInst *LastInstr = BB->getLastNonPseudoInstr()) {
|
||||
const uint32_t Size =
|
||||
BC.MIB->getAnnotationWithDefault<uint32_t>(*LastInstr, "Size");
|
||||
if (BB->getEndOffset() - Offset == Size)
|
||||
return LastInstr;
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
} else {
|
||||
llvm_unreachable("invalid CFG state to use getInstructionAtOffset()");
|
||||
|
|
|
@ -100,6 +100,28 @@ void BinaryFunction::postProcessProfile() {
|
|||
}
|
||||
}
|
||||
|
||||
// Fix for old profiles.
|
||||
for (BinaryBasicBlock *BB : BasicBlocks) {
|
||||
if (BB->size() != 1 || BB->succ_size() != 1)
|
||||
continue;
|
||||
|
||||
if (BB->getKnownExecutionCount() == 0)
|
||||
continue;
|
||||
|
||||
MCInst *Instr = BB->getFirstNonPseudoInstr();
|
||||
assert(Instr && "expected non-pseudo instr");
|
||||
if (!BC.MIB->hasAnnotation(*Instr, "NOP"))
|
||||
continue;
|
||||
|
||||
BinaryBasicBlock *FTSuccessor = BB->getSuccessor();
|
||||
BinaryBasicBlock::BinaryBranchInfo &BI = BB->getBranchInfo(*FTSuccessor);
|
||||
if (!BI.Count) {
|
||||
BI.Count = BB->getKnownExecutionCount();
|
||||
FTSuccessor->setExecutionCount(FTSuccessor->getKnownExecutionCount() +
|
||||
BI.Count);
|
||||
}
|
||||
}
|
||||
|
||||
if (opts::FixBlockCounts) {
|
||||
for (BinaryBasicBlock *BB : BasicBlocks) {
|
||||
// Make sure that execution count of a block is at least the branch count
|
||||
|
|
|
@ -720,10 +720,9 @@ bool DataReader::recordBranch(BinaryFunction &BF, uint64_t From, uint64_t To,
|
|||
return true;
|
||||
}
|
||||
|
||||
if (FromBB->succ_size() == 0) {
|
||||
// Return from a tail call.
|
||||
// Return from a tail call.
|
||||
if (FromBB->succ_size() == 0)
|
||||
return true;
|
||||
}
|
||||
|
||||
// Very rarely we will see ignored branches. Do a linear check.
|
||||
for (std::pair<uint32_t, uint32_t> &Branch : BF.IgnoredBranches) {
|
||||
|
@ -817,10 +816,21 @@ bool DataReader::recordBranch(BinaryFunction &BF, uint64_t From, uint64_t To,
|
|||
if (collectedInBoltedBinary() && FromBB == ToBB)
|
||||
return true;
|
||||
|
||||
LLVM_DEBUG(dbgs() << "invalid branch in " << BF << '\n'
|
||||
<< Twine::utohexstr(From) << " -> "
|
||||
<< Twine::utohexstr(To) << '\n');
|
||||
return false;
|
||||
BinaryBasicBlock *FTSuccessor = FromBB->getConditionalSuccessor(false);
|
||||
if (FTSuccessor && FTSuccessor->succ_size() == 1 &&
|
||||
FTSuccessor->getSuccessor(ToBB->getLabel())) {
|
||||
BinaryBasicBlock::BinaryBranchInfo &FTBI =
|
||||
FTSuccessor->getBranchInfo(*ToBB);
|
||||
FTBI.Count += Count;
|
||||
if (Count)
|
||||
FTBI.MispredictedCount += Mispreds;
|
||||
ToBB = FTSuccessor;
|
||||
} else {
|
||||
LLVM_DEBUG(dbgs() << "invalid branch in " << BF << '\n'
|
||||
<< Twine::utohexstr(From) << " -> "
|
||||
<< Twine::utohexstr(To) << '\n');
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
BinaryBasicBlock::BinaryBranchInfo &BI = FromBB->getBranchInfo(*ToBB);
|
||||
|
|
|
@ -147,7 +147,7 @@ PrintICP("print-icp",
|
|||
cl::Hidden,
|
||||
cl::cat(BoltOptCategory));
|
||||
|
||||
static cl::opt<bool>
|
||||
cl::opt<bool>
|
||||
PrintNormalized("print-normalized",
|
||||
cl::desc("print functions after CFG is normalized"),
|
||||
cl::ZeroOrMore,
|
||||
|
|
|
@ -43,6 +43,7 @@ extern cl::opt<bool> NeverPrint;
|
|||
extern cl::opt<std::string> OutputFilename;
|
||||
extern cl::opt<bool> PrintAfterBranchFixup;
|
||||
extern cl::opt<bool> PrintFinalized;
|
||||
extern cl::opt<bool> PrintNormalized;
|
||||
extern cl::opt<bool> PrintReordered;
|
||||
extern cl::opt<bool> PrintSections;
|
||||
extern cl::opt<bool> PrintDisasm;
|
||||
|
@ -352,6 +353,13 @@ void MachORewriteInstance::runOptimizationPasses() {
|
|||
Manager.registerPass(std::make_unique<PatchEntries>());
|
||||
Manager.registerPass(std::make_unique<Instrumentation>(opts::NeverPrint));
|
||||
}
|
||||
|
||||
Manager.registerPass(std::make_unique<ShortenInstructions>(opts::NeverPrint));
|
||||
|
||||
Manager.registerPass(std::make_unique<RemoveNops>(opts::NeverPrint));
|
||||
|
||||
Manager.registerPass(std::make_unique<NormalizeCFG>(opts::PrintNormalized));
|
||||
|
||||
Manager.registerPass(
|
||||
std::make_unique<ReorderBasicBlocks>(opts::PrintReordered));
|
||||
Manager.registerPass(
|
||||
|
|
|
@ -2816,6 +2816,8 @@ void RewriteInstance::buildFunctionsCFG() {
|
|||
// Create annotation indices to allow lock-free execution
|
||||
BC->MIB->getOrCreateAnnotationIndex("Offset");
|
||||
BC->MIB->getOrCreateAnnotationIndex("JTIndexReg");
|
||||
BC->MIB->getOrCreateAnnotationIndex("NOP");
|
||||
BC->MIB->getOrCreateAnnotationIndex("Size");
|
||||
|
||||
ParallelUtilities::WorkFuncWithAllocTy WorkFun =
|
||||
[&](BinaryFunction &BF, MCPlusBuilder::AllocatorIdTy AllocId) {
|
||||
|
|
Loading…
Reference in New Issue