forked from OSchip/llvm-project
[BOLT] Decoder cache friendly alignment wrt Intel JCC Erratum
Summary: This diff ports reviews.llvm.org/D70157 to our LLVM tree, which makes the integrated assembler able to align X86 control-flow changing instructions in a way to reduce the performance impact of the ucode update on Intel processors that implement the JCC erratum mitigation. See white paper "Mitigations for Jump Conditional Code Erratum" by Intel published November 2019. To port this patch, I changed classifySecondInstInMacroFusion to analyze instruction opcodes directly instead of analyzing the CondCond operand (in more recent versions of LLVM, all conditional branches share the same opcode, but with a different conditional operand). I also pulled to our tree Alignment.h as a dependency, and the macroop analyzing helpers. x86-align-branch-boundary and -x86-align-branch are the two flags that control nop insertion to avoid disabling the decoder cache, following the original patch. In BOLT, I added the flag x86-align-branch-boundary-hot-only to request the alignment to only be applied to hot code, which is turned on by default. The reason is because such alignment is expensive to perform on large modules, but if we limit it to hot code, the relaxation pass runtime becomes tolerable. (cherry picked from FBD19828850)
This commit is contained in:
parent
d5b8fc8fbe
commit
c82e7fd1cc
1345
bolt/llvm.patch
1345
bolt/llvm.patch
File diff suppressed because it is too large
Load Diff
|
@ -81,6 +81,9 @@ using namespace llvm;
|
|||
using namespace object;
|
||||
using namespace bolt;
|
||||
|
||||
extern cl::opt<uint32_t> X86AlignBranchBoundary;
|
||||
extern cl::opt<bool> X86AlignBranchWithin32BBoundaries;
|
||||
|
||||
namespace opts {
|
||||
|
||||
extern bool HeatmapMode;
|
||||
|
@ -432,6 +435,12 @@ WriteBoltInfoSection("bolt-info",
|
|||
cl::Hidden,
|
||||
cl::cat(BoltOutputCategory));
|
||||
|
||||
static cl::opt<bool>
|
||||
X86AlignBranchBoundaryHotOnly("x86-align-branch-boundary-hot-only",
|
||||
cl::desc("only apply branch boundary alignment in hot code"),
|
||||
cl::init(true),
|
||||
cl::cat(BoltOptCategory));
|
||||
|
||||
bool isHotTextMover(const BinaryFunction &Function) {
|
||||
for (auto &SectionName : opts::HotTextMoveSections) {
|
||||
if (Function.getOriginSectionName() == SectionName)
|
||||
|
@ -1765,6 +1774,18 @@ void RewriteInstance::adjustCommandLineOptions() {
|
|||
opts::AlignMacroOpFusion = MFT_NONE;
|
||||
}
|
||||
|
||||
if ((X86AlignBranchWithin32BBoundaries || X86AlignBranchBoundary != 0) &&
|
||||
BC->isX86()) {
|
||||
if (!BC->HasRelocations) {
|
||||
errs() << "BOLT-ERROR: cannot apply mitigations for Intel JCC erratum in "
|
||||
"non-relocation mode\n";
|
||||
exit(1);
|
||||
}
|
||||
outs() << "BOLT-WARNING: using mitigation for Intel JCC erratum, layout "
|
||||
"may take several minutes\n";
|
||||
opts::AlignMacroOpFusion = MFT_NONE;
|
||||
}
|
||||
|
||||
if (opts::AlignMacroOpFusion != MFT_NONE &&
|
||||
!BC->HasRelocations) {
|
||||
outs() << "BOLT-INFO: disabling -align-macro-fusion in non-relocation "
|
||||
|
@ -3010,6 +3031,8 @@ void RewriteInstance::updateSDTMarkers() {
|
|||
|
||||
void RewriteInstance::emitFunctions(MCStreamer *Streamer) {
|
||||
auto emit = [&](const std::vector<BinaryFunction *> &Functions) {
|
||||
const auto HasProfile = BC->NumProfiledFuncs > 0;
|
||||
const uint32_t OriginalBranchBoundaryAlign = X86AlignBranchBoundary;
|
||||
for (auto *Function : Functions) {
|
||||
if (!BC->HasRelocations &&
|
||||
(!Function->isSimple() || !opts::shouldProcess(*Function)))
|
||||
|
@ -3020,10 +3043,19 @@ void RewriteInstance::emitFunctions(MCStreamer *Streamer) {
|
|||
<< Function->getFunctionNumber() << '\n');
|
||||
|
||||
bool Emitted{false};
|
||||
// Turn off Intel JCC Erratum mitigation for cold code if requested
|
||||
if (HasProfile && opts::X86AlignBranchBoundaryHotOnly &&
|
||||
!Function->hasValidProfile())
|
||||
X86AlignBranchBoundary = 0;
|
||||
|
||||
Emitted |= emitFunction(*Streamer, *Function, /*EmitColdPart=*/false);
|
||||
|
||||
if (Function->isSplit())
|
||||
if (Function->isSplit()) {
|
||||
if (opts::X86AlignBranchBoundaryHotOnly)
|
||||
X86AlignBranchBoundary = 0;
|
||||
Emitted |= emitFunction(*Streamer, *Function, /*EmitColdPart=*/true);
|
||||
}
|
||||
X86AlignBranchBoundary = OriginalBranchBoundaryAlign;
|
||||
|
||||
if (Emitted)
|
||||
Function->setEmitted(/*KeepCFG=*/opts::PrintCacheMetrics);
|
||||
|
|
Loading…
Reference in New Issue