[BOLT] Improve ICP activation policy and hot jt processing

Summary:
Previously, ICP worked with a budget of N targets to convert to
direct calls. As long as the frequency of up to N of the hottest targets
surpassed a given fraction (threshold) of the total frequency, say, 90%,
then the optimization would convert a number of targets (up to N) to
direct calls. Otherwise, it would completely abort processing this call
site. The intent was to convert a given fraction of the indirect call
site frequency to use direct calls instead, but this ends up being a
"all or nothing" strategy.

In this patch we change this to operate with the same strategy seem in
LLVM's ICP, with two thresholds. The idea is that the hottest target of
an indirect call site will be compared against these two thresholds: one
checks its frequency relative to the total frequency of the original
indirect call site, and the other checks its frequency relative to the
remaining, unconverted targets (excluding the hottest targets that were
already converted to direct calls). The remaining threshold is typically
set higher than the total threshold. This allows us more control over
ICP.

I expose two pairs of knobs, one for jump tables and another for
indirect calls.

To improve the promotion of hot jump table indices when we have memory
profile, I also fix a bug that could cause us to promote extra indices
besides the hottest ones as seen in the memory profile. When we have the
memory profile, I reapply the dual threshold checks to the memory
profile which specifies exactly which indices are hot. I then update N,
the number of targets to be promoted, based on this new information, and
update frequency information.

To allow us to work with smaller profiles, I also created an option in
perf2bolt to filter out memory samples outside the statically allocated
area of the binary (heap/stack). This option is on by default.

(cherry picked from FBD15187832)
This commit is contained in:
Rafael Auler 2019-05-02 12:28:34 -07:00 committed by Maksim Panchenko
parent fee61231ef
commit f1fde44154
3 changed files with 218 additions and 86 deletions

View File

@ -54,6 +54,13 @@ IgnoreBuildID("ignore-build-id",
cl::init(false),
cl::cat(AggregatorCategory));
static cl::opt<bool>
FilterMemProfile("filter-mem-profile",
cl::desc("if processing a memory profile, filter out stack or heap accesses that "
"won't be useful for BOLT to reduce profile file size"),
cl::init(true),
cl::cat(AggregatorCategory));
static cl::opt<unsigned>
HeatmapBlock("block-size",
cl::desc("size of a heat map block in bytes (default 64)"),
@ -1163,14 +1170,12 @@ std::error_code DataAggregator::parseBranchEvents() {
<< NumEntries << " LBR entries\n";
if (NumTotalSamples) {
if (NumSamples && NumSamplesNoLBR == NumSamples) {
if (errs().has_colors())
errs().changeColor(raw_ostream::RED);
// Note: we don't know if perf2bolt is being used to parse memory samples
// at this point. In this case, it is OK to parse zero LBRs.
errs() << "PERF2BOLT-WARNING: all recorded samples for this binary lack "
"LBR. Record profile with perf record -j any or run perf2bolt "
"in no-LBR mode with -nl (the performance improvement in -nl "
"mode may be limited)\n";
if (errs().has_colors())
errs().resetColor();
} else {
const auto IgnoredSamples = NumTotalSamples - NumSamples;
const auto PercentIgnored = 100.0f * IgnoredSamples / NumTotalSamples;
@ -1344,11 +1349,17 @@ void DataAggregator::processMemEvents() {
if (MemFunc) {
MemName = MemFunc->getNames()[0];
Addr -= MemFunc->getAddress();
} else if (Addr) { // TODO: filter heap/stack/nulls here?
} else if (Addr) {
if (auto *BD = BC->getBinaryDataContainingAddress(Addr)) {
MemName = BD->getName();
Addr -= BD->getAddress();
} else if (opts::FilterMemProfile) {
// Filter out heap/stack accesses
continue;
}
} else if (opts::FilterMemProfile) {
// Filter out nulls
continue;
}
const Location FuncLoc(!FuncName.empty(), FuncName, PC);

View File

@ -40,11 +40,43 @@ IndirectCallPromotion("indirect-call-promotion",
cl::cat(BoltOptCategory));
static cl::opt<unsigned>
IndirectCallPromotionThreshold(
"indirect-call-promotion-threshold",
cl::desc("threshold for optimizing a frequently taken indirect call"),
cl::init(90),
ICPJTRemainingPercentThreshold(
"icp-jt-remaining-percent-threshold",
cl::desc("The percentage threshold against remaining unpromoted indirect "
"call count for the promotion for jump tables"),
cl::init(30),
cl::ZeroOrMore,
cl::Hidden,
cl::cat(BoltOptCategory));
static cl::opt<unsigned>
ICPJTTotalPercentThreshold(
"icp-jt-total-percent-threshold",
cl::desc("The percentage threshold against total count for the promotion for "
"jump tables"),
cl::init(5),
cl::ZeroOrMore,
cl::Hidden,
cl::cat(BoltOptCategory));
static cl::opt<unsigned>
ICPCallsRemainingPercentThreshold(
"icp-calls-remaining-percent-threshold",
cl::desc("The percentage threshold against remaining unpromoted indirect "
"call count for the promotion for calls"),
cl::init(50),
cl::ZeroOrMore,
cl::Hidden,
cl::cat(BoltOptCategory));
static cl::opt<unsigned>
ICPCallsTotalPercentThreshold(
"icp-calls-total-percent-threshold",
cl::desc("The percentage threshold against total count for the promotion for "
"calls"),
cl::init(30),
cl::ZeroOrMore,
cl::Hidden,
cl::cat(BoltOptCategory));
static cl::opt<unsigned>
@ -52,7 +84,7 @@ IndirectCallPromotionMispredictThreshold(
"indirect-call-promotion-mispredict-threshold",
cl::desc("misprediction threshold for skipping ICP on an "
"indirect call"),
cl::init(2),
cl::init(0),
cl::ZeroOrMore,
cl::cat(BoltOptCategory));
@ -69,17 +101,17 @@ IndirectCallPromotionUseMispredicts(
static cl::opt<unsigned>
IndirectCallPromotionTopN(
"indirect-call-promotion-topn",
cl::desc("number of targets to consider when doing indirect "
"call promotion"),
cl::init(1),
cl::desc("limit number of targets to consider when doing indirect "
"call promotion. 0 = no limit"),
cl::init(3),
cl::ZeroOrMore,
cl::cat(BoltOptCategory));
static cl::opt<unsigned>
IndirectCallPromotionCallsTopN(
"indirect-call-promotion-calls-topn",
cl::desc("number of targets to consider when doing indirect "
"call promotion on calls"),
cl::desc("limit number of targets to consider when doing indirect "
"call promotion on calls. 0 = no limit"),
cl::init(0),
cl::ZeroOrMore,
cl::cat(BoltOptCategory));
@ -87,8 +119,8 @@ IndirectCallPromotionCallsTopN(
static cl::opt<unsigned>
IndirectCallPromotionJumpTablesTopN(
"indirect-call-promotion-jump-tables-topn",
cl::desc("number of targets to consider when doing indirect "
"call promotion on jump tables"),
cl::desc("limit number of targets to consider when doing indirect "
"call promotion on jump tables. 0 = no limit"),
cl::init(0),
cl::ZeroOrMore,
cl::cat(BoltOptCategory));
@ -106,8 +138,8 @@ static cl::opt<unsigned>
ICPTopCallsites(
"icp-top-callsites",
cl::desc("only optimize calls that contribute to this percentage of all "
"indirect calls"),
cl::init(0),
"indirect calls. 0 = all callsites"),
cl::init(99),
cl::Hidden,
cl::ZeroOrMore,
cl::cat(BoltOptCategory));
@ -181,6 +213,42 @@ IndirectCallPromotion::Callsite::Callsite(BinaryFunction &BF,
}
}
void IndirectCallPromotion::printDecision(
llvm::raw_ostream &OS,
std::vector<IndirectCallPromotion::Callsite> &Targets, unsigned N) const {
uint64_t TotalCount = 0;
uint64_t TotalMispreds = 0;
for (const auto &S : Targets) {
TotalCount += S.Branches;
TotalMispreds += S.Mispreds;
}
if (!TotalCount)
TotalCount = 1;
if (!TotalMispreds)
TotalMispreds = 1;
OS << "BOLT-INFO: ICP decision for call site with " << Targets.size()
<< " targets, Count = " << TotalCount << ", Mispreds = " << TotalMispreds
<< "\n";
size_t I = 0;
for (const auto &S : Targets) {
OS << "Count = " << S.Branches << ", "
<< format("%.1f", (100.0 * S.Branches) / TotalCount) << ", "
<< "Mispreds = " << S.Mispreds << ", "
<< format("%.1f", (100.0 * S.Mispreds) / TotalMispreds);
if (I < N)
OS << " * to be optimized *";
if (!S.JTIndices.empty()) {
OS << " Indices:";
for (const auto Idx : S.JTIndices)
OS << " " << Idx;
}
OS << "\n";
I += S.JTIndices.empty() ? 1 : S.JTIndices.size();
}
}
// Get list of targets for a given call sorted by most frequently
// called first.
std::vector<IndirectCallPromotion::Callsite>
@ -242,7 +310,8 @@ IndirectCallPromotion::getCallTargets(
auto &A = *Result;
const auto &B = *First;
if (A.To.Sym && B.To.Sym && A.To.Sym == B.To.Sym) {
A.JTIndex.insert(A.JTIndex.end(), B.JTIndex.begin(), B.JTIndex.end());
A.JTIndices.insert(A.JTIndices.end(), B.JTIndices.begin(),
B.JTIndices.end());
} else {
*(++Result) = *First;
}
@ -491,7 +560,7 @@ IndirectCallPromotion::SymTargetsType
IndirectCallPromotion::findCallTargetSymbols(
BinaryContext &BC,
std::vector<Callsite> &Targets,
const size_t N,
size_t &N,
BinaryFunction &Function,
BinaryBasicBlock *BB,
MCInst &CallInst,
@ -511,7 +580,7 @@ IndirectCallPromotion::findCallTargetSymbols(
if (!HotTargets.empty()) {
auto findTargetsIndex = [&](uint64_t JTIndex) {
for (size_t I = 0; I < Targets.size(); ++I) {
auto &JTIs = Targets[I].JTIndex;
auto &JTIs = Targets[I].JTIndices;
if (std::find(JTIs.begin(), JTIs.end(), JTIndex) != JTIs.end())
return I;
}
@ -521,35 +590,81 @@ IndirectCallPromotion::findCallTargetSymbols(
"callsite");
};
const auto MaxHotTargets = std::min(N, HotTargets.size());
if (opts::Verbosity >= 1) {
for (size_t I = 0; I < MaxHotTargets; ++I) {
for (size_t I = 0; I < HotTargets.size(); ++I) {
outs() << "BOLT-INFO: HotTarget[" << I << "] = ("
<< HotTargets[I].first << ", " << HotTargets[I].second << ")\n";
}
}
// Recompute hottest targets, now discriminating which index is hot
// NOTE: This is a tradeoff. On one hand, we get index information. On the
// other hand, info coming from the memory profile is much less accurate
// than LBRs. So we may actually end up working with more coarse
// profile granularity in exchange for information about indices.
std::vector<Callsite> NewTargets;
for (size_t I = 0; I < MaxHotTargets; ++I) {
std::map<const MCSymbol *, uint32_t> IndicesPerTarget;
uint64_t TotalMemAccesses = 0;
for (size_t I = 0; I < HotTargets.size(); ++I) {
const auto TargetIndex = findTargetsIndex(HotTargets[I].second);
++IndicesPerTarget[Targets[TargetIndex].To.Sym];
TotalMemAccesses += HotTargets[I].first;
}
uint64_t RemainingMemAccesses = TotalMemAccesses;
const size_t TopN = opts::IndirectCallPromotionJumpTablesTopN != 0
? opts::IndirectCallPromotionTopN
: opts::IndirectCallPromotionTopN;
size_t I{0};
for (; I < HotTargets.size(); ++I) {
const auto MemAccesses = HotTargets[I].first;
if (100 * MemAccesses <
TotalMemAccesses * opts::ICPJTTotalPercentThreshold)
break;
if (100 * MemAccesses <
RemainingMemAccesses * opts::ICPJTRemainingPercentThreshold)
break;
if (TopN && I >= TopN)
break;
RemainingMemAccesses -= MemAccesses;
const auto JTIndex = HotTargets[I].second;
const auto TargetIndex = findTargetsIndex(JTIndex);
auto &Target = Targets[findTargetsIndex(JTIndex)];
NewTargets.push_back(Targets[TargetIndex]);
std::vector<uint64_t>({JTIndex}).swap(NewTargets.back().JTIndex);
NewTargets.push_back(Target);
std::vector<uint64_t>({JTIndex}).swap(NewTargets.back().JTIndices);
Target.JTIndices.erase(std::remove(Target.JTIndices.begin(),
Target.JTIndices.end(), JTIndex),
Target.JTIndices.end());
Targets.erase(Targets.begin() + TargetIndex);
// Keep fixCFG counts sane if more indices use this same target later
assert(IndicesPerTarget[Target.To.Sym] > 0 && "wrong map");
NewTargets.back().Branches =
Target.Branches / IndicesPerTarget[Target.To.Sym];
NewTargets.back().Mispreds =
Target.Mispreds / IndicesPerTarget[Target.To.Sym];
assert(Target.Branches >= NewTargets.back().Branches);
assert(Target.Mispreds >= NewTargets.back().Mispreds);
Target.Branches -= NewTargets.back().Branches;
Target.Mispreds -= NewTargets.back().Mispreds;
}
std::copy(Targets.begin(), Targets.end(), std::back_inserter(NewTargets));
assert(NewTargets.size() == Targets.size() + MaxHotTargets);
std::swap(NewTargets, Targets);
N = I;
if (N == 0 && opts::Verbosity >= 1) {
outs() << "BOLT-INFO: ICP failed in " << Function << " in "
<< BB->getName()
<< ": failed to meet thresholds after memory profile data was "
"loaded.\n";
return SymTargets;
}
}
for (size_t I = 0, TgtIdx = 0; I < N; ++TgtIdx) {
auto &Target = Targets[TgtIdx];
assert(Target.To.Sym && "All ICP targets must be to known symbols");
assert(!Target.JTIndex.empty() && "Jump tables must have indices");
for (auto Idx : Target.JTIndex) {
assert(!Target.JTIndices.empty() && "Jump tables must have indices");
for (auto Idx : Target.JTIndices) {
SymTargets.push_back(std::make_pair(Target.To.Sym, Idx));
++I;
}
@ -558,7 +673,7 @@ IndirectCallPromotion::findCallTargetSymbols(
for (size_t I = 0; I < N; ++I) {
assert(Targets[I].To.Sym &&
"All ICP targets must be to known symbols");
assert(Targets[I].JTIndex.empty() &&
assert(Targets[I].JTIndices.empty() &&
"Can't have jump table indices for non-jump tables");
SymTargets.push_back(std::make_pair(Targets[I].To.Sym, 0));
}
@ -774,10 +889,12 @@ BinaryBasicBlock *IndirectCallPromotion::fixCFG(
for (const auto &Target : Targets) {
TotalIndirectBranches += Target.Branches;
}
if (TotalIndirectBranches == 0)
TotalIndirectBranches = 1;
std::vector<BinaryBranchInfo> BBI;
std::vector<BinaryBranchInfo> ScaledBBI;
for (const auto &Target : Targets) {
const auto NumEntries = std::max(1UL, Target.JTIndex.size());
const auto NumEntries = std::max(1UL, Target.JTIndices.size());
for (size_t I = 0; I < NumEntries; ++I) {
BBI.push_back(
BinaryBranchInfo{(Target.Branches + NumEntries - 1) / NumEntries,
@ -796,7 +913,7 @@ BinaryBasicBlock *IndirectCallPromotion::fixCFG(
std::vector<MCSymbol*> SymTargets;
for (const auto &Target : Targets) {
const auto NumEntries = std::max(1UL, Target.JTIndex.size());
const auto NumEntries = std::max(1UL, Target.JTIndices.size());
for (size_t I = 0; I < NumEntries; ++I) {
SymTargets.push_back(Target.To.Sym);
}
@ -924,15 +1041,12 @@ IndirectCallPromotion::canPromoteCallsite(const BinaryBasicBlock *BB,
} else if (opts::IndirectCallPromotionCallsTopN != 0) {
TopN = opts::IndirectCallPromotionCallsTopN;
}
const auto TrialN = std::min(TopN, Targets.size());
const auto TrialN = TopN ? std::min(TopN, Targets.size()) : Targets.size();
if (opts::ICPTopCallsites > 0) {
auto &BC = BB->getFunction()->getBinaryContext();
if (BC.MIB->hasAnnotation(Inst, "DoICP")) {
computeStats(TrialN);
return TrialN;
}
return 0;
if (!BC.MIB->hasAnnotation(Inst, "DoICP"))
return 0;
}
// Pick the top N targets.
@ -974,35 +1088,28 @@ IndirectCallPromotion::canPromoteCallsite(const BinaryBasicBlock *BB,
// Count total number of calls for (at most) the top N targets.
// We may choose a smaller N (TrialN vs. N) if the frequency threshold
// is exceeded by fewer targets.
double Threshold = double(opts::IndirectCallPromotionThreshold);
for (size_t I = 0; I < TrialN && Threshold > 0; ++I, ++MaxTargets) {
if (N + (Targets[I].JTIndex.empty() ? 1 : Targets[I].JTIndex.size()) >
const unsigned TotalThreshold = IsJumpTable
? opts::ICPJTTotalPercentThreshold
: opts::ICPCallsTotalPercentThreshold;
const unsigned RemainingThreshold =
IsJumpTable ? opts::ICPJTRemainingPercentThreshold
: opts::ICPCallsRemainingPercentThreshold;
uint64_t NumRemainingCalls = NumCalls;
for (size_t I = 0; I < TrialN; ++I, ++MaxTargets) {
if (100 * Targets[I].Branches < NumCalls * TotalThreshold)
break;
if (100 * Targets[I].Branches < NumRemainingCalls * RemainingThreshold)
break;
if (N + (Targets[I].JTIndices.empty() ? 1 : Targets[I].JTIndices.size()) >
TrialN)
break;
TotalCallsTopN += Targets[I].Branches;
TotalMispredictsTopN += Targets[I].Mispreds;
Threshold -= (100.0 * Targets[I].Branches) / NumCalls;
N += Targets[I].JTIndex.empty() ? 1 : Targets[I].JTIndex.size();
NumRemainingCalls -= Targets[I].Branches;
N += Targets[I].JTIndices.empty() ? 1 : Targets[I].JTIndices.size();
}
computeStats(MaxTargets);
// Compute the frequency of the top N call targets. If this frequency
// is greater than the threshold, we should try ICP on this callsite.
const double TopNFrequency = (100.0 * TotalCallsTopN) / NumCalls;
if (TopNFrequency == 0 ||
TopNFrequency < opts::IndirectCallPromotionThreshold) {
if (opts::Verbosity >= 1) {
const auto InstIdx = &Inst - &(*BB->begin());
outs() << "BOLT-INFO: ICP failed in " << *BB->getFunction() << " @ "
<< InstIdx << " in " << BB->getName() << ", calls = "
<< NumCalls << ", top N frequency "
<< format("%.1f", TopNFrequency) << "% < "
<< opts::IndirectCallPromotionThreshold << "%\n";
}
return 0;
}
// Don't check misprediction frequency for jump tables -- we don't really
// care as long as we are saving loads from the jump table.
if (!IsJumpTable || opts::ICPJumpTablesByTarget) {
@ -1069,7 +1176,7 @@ IndirectCallPromotion::printCallsiteInfo(const BinaryBasicBlock *BB,
<< ", taken freq = " << format("%.1f", Frequency) << "%"
<< ", mis. freq = " << format("%.1f", MisFrequency) << "%";
bool First = true;
for (auto JTIndex : Targets[I].JTIndex) {
for (auto JTIndex : Targets[I].JTIndices) {
outs() << (First ? ", indices = " : ", ") << JTIndex;
First = false;
}
@ -1146,8 +1253,13 @@ void IndirectCallPromotion::runOnFunctions(BinaryContext &BC) {
// If icp-top-callsites is enabled, compute the total number of indirect
// calls and then optimize the hottest callsites that contribute to that
// total.
if (opts::ICPTopCallsites > 0) {
using IndirectCallsite = std::pair<uint64_t, MCInst *>;
SetVector<BinaryFunction *> Functions;
if (opts::ICPTopCallsites == 0) {
for (auto &KV : BFs) {
Functions.insert(&KV.second);
}
} else {
using IndirectCallsite = std::tuple<uint64_t, MCInst *, BinaryFunction *>;
std::vector<IndirectCallsite> IndirectCalls;
size_t TotalIndirectCalls = 0;
@ -1181,7 +1293,7 @@ void IndirectCallPromotion::runOnFunctions(BinaryContext &BC) {
NumCalls += BInfo.Branches;
}
IndirectCalls.push_back(std::make_pair(NumCalls, &Inst));
IndirectCalls.push_back(std::make_tuple(NumCalls, &Inst, &Function));
TotalIndirectCalls += NumCalls;
}
}
@ -1196,30 +1308,25 @@ void IndirectCallPromotion::runOnFunctions(BinaryContext &BC) {
const float TopPerc = opts::ICPTopCallsites / 100.0f;
int64_t MaxCalls = TotalIndirectCalls * TopPerc;
size_t Num = 0;
for (auto &IC : IndirectCalls) {
for (const auto &IC : IndirectCalls) {
if (MaxCalls <= 0)
break;
MaxCalls -= IC.first;
MaxCalls -= std::get<0>(IC);
BC.MIB->addAnnotation(*std::get<1>(IC), "DoICP", true);
Functions.insert(std::get<2>(IC));
++Num;
}
outs() << "BOLT-INFO: ICP Total indirect calls = " << TotalIndirectCalls
<< ", " << Num << " callsites cover " << opts::ICPTopCallsites
<< "% of all indirect calls\n";
// Mark sites to optimize with "DoICP" annotation.
for (size_t I = 0; I < Num; ++I) {
auto *Inst = IndirectCalls[I].second;
BC.MIB->addAnnotation(*Inst, "DoICP", true);
}
}
for (auto &BFIt : BFs) {
auto &Function = BFIt.second;
for (auto *FuncPtr : Functions) {
auto &Function = *FuncPtr;
if (!Function.isSimple() || !opts::shouldProcess(Function))
continue;
if (!Function.hasProfile())
if (!Function.isSimple() ||
!opts::shouldProcess(Function) ||
!Function.hasProfile())
continue;
const bool HasLayout = !Function.layout_empty();
@ -1307,7 +1414,10 @@ void IndirectCallPromotion::runOnFunctions(BinaryContext &BC) {
// this callsite.
size_t N = canPromoteCallsite(BB, Inst, Targets, NumCalls);
if (!N)
// If it is a jump table and it failed to meet our initial threshold,
// proceed to findCallTargetSymbols -- it may reevaluate N if
// memory profile is present
if (!N && !IsJumpTable)
continue;
if (opts::Verbosity >= 1) {
@ -1324,6 +1434,13 @@ void IndirectCallPromotion::runOnFunctions(BinaryContext &BC) {
Inst,
TargetFetchInst);
// findCallTargetSymbols may have changed N if mem profile is available
// for jump tables
if (!N)
continue;
DEBUG(printDecision(dbgs(), Targets, N));
// If we can't resolve any of the target symbols, punt on this callsite.
// TODO: can this ever happen?
if (SymTargets.size() < N) {

View File

@ -119,7 +119,7 @@ class IndirectCallPromotion : public BinaryFunctionPass {
uint64_t Mispreds{0};
uint64_t Branches{0};
// Indices in the jmp table (jt only)
std::vector<uint64_t> JTIndex;
std::vector<uint64_t> JTIndices;
bool isValid() const {
return From.isValid() && To.isValid();
}
@ -128,7 +128,7 @@ class IndirectCallPromotion : public BinaryFunctionPass {
uint64_t Mispreds, uint64_t Branches,
uint64_t JTIndex)
: From(From), To(To), Mispreds(Mispreds), Branches(Branches),
JTIndex(1, JTIndex) { }
JTIndices(1, JTIndex) { }
};
std::unordered_set<const BinaryFunction *> Modified;
@ -177,6 +177,10 @@ class IndirectCallPromotion : public BinaryFunctionPass {
// Total number of jump table sites that use hot indices.
uint64_t TotalIndexBasedJumps{0};
void printDecision(llvm::raw_ostream &OS,
std::vector<IndirectCallPromotion::Callsite> &Targets,
unsigned N) const;
std::vector<Callsite> getCallTargets(BinaryBasicBlock &BB,
const MCInst &Inst) const;
@ -201,7 +205,7 @@ class IndirectCallPromotion : public BinaryFunctionPass {
SymTargetsType findCallTargetSymbols(BinaryContext &BC,
std::vector<Callsite> &Targets,
const size_t N,
size_t &N,
BinaryFunction &Function,
BinaryBasicBlock *BB,
MCInst &Inst,