forked from OSchip/llvm-project
[BOLT] Restore macro-fusion optimization
Summary: Restore the optimization with some modifications: * Only enabled in relocation mode. * Covers instructions other than TEST/CMP. * Prints missed macro-fusion opportunities for input. * By default enabled for all hot code. * Without profile enabled for all code. The new command-line option: -align-macro-fusion - fix instruction alignment for macro-fusion (x86 relocation mode) =none - do not insert alignment no-ops for macro-fusion =hot - only insert alignment no-ops on hot execution paths (default) =all - always align instructions to allow macro-fusion (cherry picked from FBD7644042)
This commit is contained in:
parent
c13cd9084d
commit
120d26727a
|
@ -347,6 +347,45 @@ bool BinaryBasicBlock::analyzeBranch(const MCSymbol *&TBB,
|
|||
UncondBranch);
|
||||
}
|
||||
|
||||
bool BinaryBasicBlock::isMacroOpFusionPair(const_iterator I) const {
|
||||
auto &MIB = Function->getBinaryContext().MIB;
|
||||
ArrayRef<MCInst> Insts = Instructions;
|
||||
return MIB->isMacroOpFusionPair(Insts.slice(I - begin()));
|
||||
}
|
||||
|
||||
BinaryBasicBlock::const_iterator
|
||||
BinaryBasicBlock::getMacroOpFusionPair() const {
|
||||
if (!Function->getBinaryContext().isX86())
|
||||
return end();
|
||||
|
||||
if (succ_size() != 2)
|
||||
return end();
|
||||
|
||||
auto RI = getLastNonPseudo();
|
||||
assert(RI != rend() && "cannot have an empty block with 2 successors");
|
||||
|
||||
auto &BC = Function->getBinaryContext();
|
||||
|
||||
// Skip instruction if it's an unconditional branch following
|
||||
// a conditional one.
|
||||
if (BC.MIB->isUnconditionalBranch(*RI))
|
||||
++RI;
|
||||
|
||||
if (!BC.MIB->isConditionalBranch(*RI))
|
||||
return end();
|
||||
|
||||
// Start checking with instruction preceding the conditional branch.
|
||||
++RI;
|
||||
if (RI == rend())
|
||||
return end();
|
||||
|
||||
auto II = std::prev(RI.base()); // convert to a forward iterator
|
||||
if (isMacroOpFusionPair(II))
|
||||
return II;
|
||||
|
||||
return end();
|
||||
}
|
||||
|
||||
MCInst *BinaryBasicBlock::getTerminatorBefore(MCInst *Pos) {
|
||||
auto &BC = Function->getBinaryContext();
|
||||
auto Itr = rbegin();
|
||||
|
|
|
@ -462,6 +462,9 @@ public:
|
|||
/// Return reverse iterator to the last non-pseudo instruction or rend()
|
||||
/// if no such instruction was found.
|
||||
reverse_iterator getLastNonPseudo();
|
||||
const_reverse_iterator getLastNonPseudo() const {
|
||||
return const_cast<BinaryBasicBlock *>(this)->getLastNonPseudo();
|
||||
}
|
||||
|
||||
/// Return a pointer to the last non-pseudo instruction in this basic
|
||||
/// block. Returns nullptr if none exists.
|
||||
|
@ -754,6 +757,15 @@ public:
|
|||
MCInst *&CondBranch,
|
||||
MCInst *&UncondBranch);
|
||||
|
||||
/// Return true if iterator \p I is pointing to the first instruction in
|
||||
/// a pair that could be macro-fused.
|
||||
bool isMacroOpFusionPair(const_iterator I) const;
|
||||
|
||||
/// If the basic block has a pair of instructions suitable for macro-fusion,
|
||||
/// return iterator to the first instruction of the pair.
|
||||
/// Otherwise return end().
|
||||
const_iterator getMacroOpFusionPair() const;
|
||||
|
||||
/// Printer required for printing dominator trees.
|
||||
void printAsOperand(raw_ostream &OS, bool PrintType = true) {
|
||||
if (PrintType) {
|
||||
|
|
|
@ -245,6 +245,10 @@ public:
|
|||
/// Total hotness score according to profiling data for this binary.
|
||||
uint64_t TotalScore{0};
|
||||
|
||||
/// Binary-wide stats for macro-fusion.
|
||||
uint64_t MissedMacroFusionPairs{0};
|
||||
uint64_t MissedMacroFusionExecCount{0};
|
||||
|
||||
/// Track next available address for new allocatable sections. RewriteInstance
|
||||
/// sets this prior to running BOLT passes, so layout passes are aware of the
|
||||
/// final addresses functions will have.
|
||||
|
@ -304,6 +308,11 @@ public:
|
|||
return TheTriple->getArch() == llvm::Triple::aarch64;
|
||||
}
|
||||
|
||||
bool isX86() const {
|
||||
return TheTriple->getArch() == llvm::Triple::x86 ||
|
||||
TheTriple->getArch() == llvm::Triple::x86_64;
|
||||
}
|
||||
|
||||
/// Iterate over all BinaryData.
|
||||
iterator_range<binary_data_const_iterator> getBinaryData() const {
|
||||
return make_range(BinaryDataMap.begin(), BinaryDataMap.end());
|
||||
|
|
|
@ -61,6 +61,19 @@ AlignBlocks("align-blocks",
|
|||
cl::ZeroOrMore,
|
||||
cl::cat(BoltOptCategory));
|
||||
|
||||
cl::opt<MacroFusionType>
|
||||
AlignMacroOpFusion("align-macro-fusion",
|
||||
cl::desc("fix instruction alignment for macro-fusion (x86 relocation mode)"),
|
||||
cl::init(MFT_HOT),
|
||||
cl::values(clEnumValN(MFT_NONE, "none",
|
||||
"do not insert alignment no-ops for macro-fusion"),
|
||||
clEnumValN(MFT_HOT, "hot",
|
||||
"only insert alignment no-ops on hot execution paths (default)"),
|
||||
clEnumValN(MFT_ALL, "all",
|
||||
"always align instructions to allow macro-fusion")),
|
||||
cl::ZeroOrMore,
|
||||
cl::cat(BoltRelocCategory));
|
||||
|
||||
static cl::opt<bool>
|
||||
DotToolTipCode("dot-tooltip-code",
|
||||
cl::desc("add basic block instructions as tool tips on nodes"),
|
||||
|
@ -1768,6 +1781,8 @@ void BinaryFunction::postProcessCFG() {
|
|||
// Eliminate inconsistencies between branch instructions and CFG.
|
||||
postProcessBranches();
|
||||
}
|
||||
|
||||
calculateMacroOpFusionStats();
|
||||
}
|
||||
|
||||
// The final cleanup of intermediate structures.
|
||||
|
@ -1779,8 +1794,32 @@ void BinaryFunction::postProcessCFG() {
|
|||
for (auto &Inst : *BB)
|
||||
BC.MIB->removeAnnotation(Inst, "Offset");
|
||||
|
||||
assert((!isSimple() || validateCFG())
|
||||
&& "Invalid CFG detected after post-processing CFG");
|
||||
assert((!isSimple() || validateCFG()) &&
|
||||
"invalid CFG detected after post-processing");
|
||||
}
|
||||
|
||||
void BinaryFunction::calculateMacroOpFusionStats() {
|
||||
if (!getBinaryContext().isX86())
|
||||
return;
|
||||
for (auto *BB : layout()) {
|
||||
auto II = BB->getMacroOpFusionPair();
|
||||
if (II == BB->end())
|
||||
continue;
|
||||
|
||||
// Check offset of the second instruction.
|
||||
// FIXME: arch-specific.
|
||||
const auto Offset =
|
||||
BC.MIB->getAnnotationWithDefault<uint64_t>(*std::next(II), "Offset", 0);
|
||||
if (!Offset || (getAddress() + Offset) % 64)
|
||||
continue;
|
||||
|
||||
DEBUG(dbgs() << "\nmissed macro-op fusion at address 0x"
|
||||
<< Twine::utohexstr(getAddress() + Offset) << " in function "
|
||||
<< *this << "; executed " << BB->getKnownExecutionCount()
|
||||
<< " times.\n");
|
||||
++BC.MissedMacroFusionPairs;
|
||||
BC.MissedMacroFusionExecCount += BB->getKnownExecutionCount();
|
||||
}
|
||||
}
|
||||
|
||||
void BinaryFunction::removeTagsFromProfile() {
|
||||
|
@ -2157,9 +2196,24 @@ void BinaryFunction::emitBody(MCStreamer &Streamer, bool EmitColdPart) {
|
|||
Streamer.EmitCodeAlignment(BB->getAlignment());
|
||||
Streamer.EmitLabel(BB->getLabel());
|
||||
|
||||
// Check if special alignment for macro-fusion is needed.
|
||||
bool MayNeedMacroFusionAlignment =
|
||||
(opts::AlignMacroOpFusion == MFT_ALL) ||
|
||||
(opts::AlignMacroOpFusion == MFT_HOT &&
|
||||
BB->getKnownExecutionCount());
|
||||
BinaryBasicBlock::const_iterator MacroFusionPair;
|
||||
if (MayNeedMacroFusionAlignment) {
|
||||
MacroFusionPair = BB->getMacroOpFusionPair();
|
||||
if (MacroFusionPair == BB->end())
|
||||
MayNeedMacroFusionAlignment = false;
|
||||
}
|
||||
|
||||
SMLoc LastLocSeen;
|
||||
// Remember if the last instruction emitted was a prefix.
|
||||
bool LastIsPrefix = false;
|
||||
for (auto I = BB->begin(), E = BB->end(); I != E; ++I) {
|
||||
auto &Instr = *I;
|
||||
|
||||
// Handle pseudo instructions.
|
||||
if (BC.MIB->isEHLabel(Instr)) {
|
||||
const auto *Label = BC.MIB->getTargetSymbol(Instr);
|
||||
|
@ -2172,11 +2226,23 @@ void BinaryFunction::emitBody(MCStreamer &Streamer, bool EmitColdPart) {
|
|||
Streamer.EmitCFIInstruction(*getCFIFor(Instr));
|
||||
continue;
|
||||
}
|
||||
|
||||
// Handle macro-fusion alignment. If we emitted a prefix as
|
||||
// the last instruction, we should've already emitted the associated
|
||||
// alignment hint, so don't emit it twice.
|
||||
if (MayNeedMacroFusionAlignment && !LastIsPrefix && I == MacroFusionPair){
|
||||
// This assumes the second instruction in the macro-op pair will get
|
||||
// assigned to its own MCRelaxableFragment. Since all JCC instructions
|
||||
// are relaxable, we should be safe.
|
||||
Streamer.EmitNeverAlignCodeAtEnd(/*Alignment to avoid=*/64);
|
||||
}
|
||||
|
||||
if (opts::UpdateDebugSections && UnitLineTable.first) {
|
||||
LastLocSeen = emitLineInfo(Instr.getLoc(), LastLocSeen);
|
||||
}
|
||||
|
||||
Streamer.EmitInstruction(Instr, *BC.STI);
|
||||
LastIsPrefix = BC.MIB->isPrefix(Instr);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -147,6 +147,13 @@ inline raw_ostream &operator<<(raw_ostream &OS, const DynoStats &Stats) {
|
|||
|
||||
DynoStats operator+(const DynoStats &A, const DynoStats &B);
|
||||
|
||||
/// Types of macro-fusion alignment corrections.
|
||||
enum MacroFusionType {
|
||||
MFT_NONE,
|
||||
MFT_HOT,
|
||||
MFT_ALL
|
||||
};
|
||||
|
||||
enum IndirectCallPromotionType : char {
|
||||
ICP_NONE, /// Don't perform ICP.
|
||||
ICP_CALLS, /// Perform ICP on indirect calls.
|
||||
|
@ -857,6 +864,10 @@ public:
|
|||
/// them.
|
||||
void calculateLoopInfo();
|
||||
|
||||
/// Calculate missed macro-fusion opportunities and update BinaryContext
|
||||
/// stats.
|
||||
void calculateMacroOpFusionStats();
|
||||
|
||||
/// Returns if loop detection has been run for this function.
|
||||
bool hasLoopInfo() const {
|
||||
return BLI != nullptr;
|
||||
|
|
|
@ -692,6 +692,13 @@ public:
|
|||
return false;
|
||||
}
|
||||
|
||||
/// Return true if a pair of instructions represented by \p Insts
|
||||
/// could be fused into a single uop.
|
||||
virtual bool isMacroOpFusionPair(ArrayRef<MCInst> Insts) const {
|
||||
llvm_unreachable("not implemented");
|
||||
return false;
|
||||
}
|
||||
|
||||
/// Given an instruction with (compound) memory operand, evaluate and return
|
||||
/// the corresponding values. Note that the operand could be in any position,
|
||||
/// but there is an assumption there's only one compound memory operand.
|
||||
|
@ -1354,7 +1361,7 @@ public:
|
|||
return getOrCreateAnnotationAs<ValueType>(Inst, Index);
|
||||
}
|
||||
|
||||
/// Get an annotation as a specific value. Assumes that the annotation exists.
|
||||
/// Get an annotation as a specific value. Assumes that the annotation exists.
|
||||
/// Use hasAnnotation() if the annotation may not exist.
|
||||
template <typename ValueType>
|
||||
const ValueType &getAnnotationAs(const MCInst &Inst, unsigned Index) const {
|
||||
|
@ -1364,7 +1371,7 @@ public:
|
|||
(*Value)->getValue();
|
||||
}
|
||||
|
||||
/// Get an annotation as a specific value. Assumes that the annotation exists.
|
||||
/// Get an annotation as a specific value. Assumes that the annotation exists.
|
||||
/// Use hasAnnotation() if the annotation may not exist.
|
||||
template <typename ValueType>
|
||||
const ValueType &getAnnotationAs(const MCInst &Inst, StringRef Name) const {
|
||||
|
|
|
@ -51,6 +51,7 @@ namespace opts {
|
|||
extern cl::OptionCategory BoltCategory;
|
||||
extern cl::OptionCategory BoltOptCategory;
|
||||
|
||||
extern cl::opt<bolt::MacroFusionType> AlignMacroOpFusion;
|
||||
extern cl::opt<unsigned> Verbosity;
|
||||
extern cl::opt<bolt::BinaryFunction::SplittingType> SplitFunctions;
|
||||
extern bool shouldProcess(const bolt::BinaryFunction &Function);
|
||||
|
@ -1572,6 +1573,25 @@ PrintProgramStats::runOnFunctions(BinaryContext &BC,
|
|||
errs() << " (use -v=1 to see the list).\n";
|
||||
}
|
||||
}
|
||||
|
||||
// Print information on missed macro-fusion opportunities seen on input.
|
||||
if (BC.MissedMacroFusionPairs) {
|
||||
outs() << "BOLT-INFO: the input contains "
|
||||
<< BC.MissedMacroFusionPairs << " (dynamic count : "
|
||||
<< BC.MissedMacroFusionExecCount
|
||||
<< ") missed opportunities for macro-fusion optimization";
|
||||
switch (opts::AlignMacroOpFusion) {
|
||||
case MFT_NONE:
|
||||
outs() << ". Use -align-macro-fusion to fix.\n";
|
||||
break;
|
||||
case MFT_HOT:
|
||||
outs() << ". Will fix instances on a hot path.\n";
|
||||
break;
|
||||
case MFT_ALL:
|
||||
outs() << " that are going to be fixed\n";
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void InstructionLowering::runOnFunctions(
|
||||
|
|
|
@ -81,6 +81,7 @@ extern cl::OptionCategory BoltOptCategory;
|
|||
extern cl::OptionCategory BoltOutputCategory;
|
||||
extern cl::OptionCategory AggregatorCategory;
|
||||
|
||||
extern cl::opt<MacroFusionType> AlignMacroOpFusion;
|
||||
extern cl::opt<JumpTableSupportLevel> JumpTables;
|
||||
|
||||
static cl::opt<bool>
|
||||
|
@ -115,12 +116,6 @@ BoltProfile("b",
|
|||
cl::desc("<bolt profile>"),
|
||||
cl::cat(BoltCategory));
|
||||
|
||||
cl::opt<bool>
|
||||
BoostMacroops("boost-macroops",
|
||||
cl::desc("try to boost macro-op fusions by avoiding the cache-line boundary"),
|
||||
cl::ZeroOrMore,
|
||||
cl::cat(BoltOptCategory));
|
||||
|
||||
static cl::list<std::string>
|
||||
BreakFunctionNames("break-funcs",
|
||||
cl::CommaSeparated,
|
||||
|
@ -961,21 +956,10 @@ void RewriteInstance::run() {
|
|||
return;
|
||||
}
|
||||
|
||||
// Flip unsupported flags in AArch64 mode
|
||||
if (BC->isAArch64()) {
|
||||
if (opts::BoostMacroops) {
|
||||
opts::BoostMacroops = false;
|
||||
outs() << "BOLT-INFO: disabling -boost-macroops for AArch64\n";
|
||||
}
|
||||
if (opts::RelocationMode != cl::BOU_TRUE) {
|
||||
errs() << "BOLT-WARNING: non-relocation mode for AArch64 is not fully "
|
||||
"supported\n";
|
||||
}
|
||||
}
|
||||
|
||||
auto executeRewritePass = [&](const std::set<uint64_t> &NonSimpleFunctions) {
|
||||
discoverStorage();
|
||||
readSpecialSections();
|
||||
adjustCommandLineOptions();
|
||||
discoverFileObjects();
|
||||
readDebugInfo();
|
||||
disassembleFunctions();
|
||||
|
@ -1772,6 +1756,32 @@ void RewriteInstance::readSpecialSections() {
|
|||
CFIRdWrt.reset(new CFIReaderWriter(*EHFrame));
|
||||
}
|
||||
|
||||
void RewriteInstance::adjustCommandLineOptions() {
|
||||
if (BC->isAArch64() && opts::RelocationMode != cl::BOU_TRUE) {
|
||||
errs() << "BOLT-WARNING: non-relocation mode for AArch64 is not fully "
|
||||
"supported\n";
|
||||
}
|
||||
|
||||
if (opts::AlignMacroOpFusion != MFT_NONE && !BC->isX86()) {
|
||||
outs() << "BOLT-INFO: disabling -align-macro-fusion on non-x86 platform\n";
|
||||
opts::AlignMacroOpFusion = MFT_NONE;
|
||||
}
|
||||
if (opts::AlignMacroOpFusion != MFT_NONE &&
|
||||
!BC->HasRelocations) {
|
||||
outs() << "BOLT-INFO: disabling -align-macro-fusion in non-relocation "
|
||||
"mode\n";
|
||||
opts::AlignMacroOpFusion = MFT_NONE;
|
||||
}
|
||||
if (BC->isX86() && BC->HasRelocations &&
|
||||
opts::AlignMacroOpFusion == MFT_HOT &&
|
||||
!DA.started() && BC->DR.getAllFuncsData().empty() &&
|
||||
opts::BoltProfile.empty()) {
|
||||
outs() << "BOLT-INFO: enabling -align-macro-fusion=all since no profile "
|
||||
"was specified\n";
|
||||
opts::AlignMacroOpFusion = MFT_ALL;
|
||||
}
|
||||
}
|
||||
|
||||
namespace {
|
||||
template <typename ELFT>
|
||||
int64_t getRelocationAddend(const ELFObjectFile<ELFT> *Obj,
|
||||
|
|
|
@ -137,6 +137,9 @@ public:
|
|||
/// for exception and stack unwinding information.
|
||||
void readSpecialSections();
|
||||
|
||||
/// Adjust supplied command-line options based on input data.
|
||||
void adjustCommandLineOptions();
|
||||
|
||||
/// Read relocations from a given section.
|
||||
void readRelocations(const object::SectionRef &Section);
|
||||
|
||||
|
|
|
@ -50,6 +50,10 @@ public:
|
|||
return false;
|
||||
}
|
||||
|
||||
bool isMacroOpFusionPair(ArrayRef<MCInst> Insts) const override {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool shortenInstruction(MCInst &) const override {
|
||||
return false;
|
||||
}
|
||||
|
|
|
@ -215,6 +215,255 @@ unsigned getInvertedBranchOpcode(unsigned Opcode) {
|
|||
}
|
||||
}
|
||||
|
||||
bool isADD(unsigned Opcode) {
|
||||
switch (Opcode) {
|
||||
default:
|
||||
return false;
|
||||
case X86::ADD16i16:
|
||||
case X86::ADD16mi:
|
||||
case X86::ADD16mi8:
|
||||
case X86::ADD16mr:
|
||||
case X86::ADD16ri:
|
||||
case X86::ADD16ri8:
|
||||
case X86::ADD16ri8_DB:
|
||||
case X86::ADD16ri_DB:
|
||||
case X86::ADD16rm:
|
||||
case X86::ADD16rr:
|
||||
case X86::ADD16rr_DB:
|
||||
case X86::ADD16rr_REV:
|
||||
case X86::ADD32i32:
|
||||
case X86::ADD32mi:
|
||||
case X86::ADD32mi8:
|
||||
case X86::ADD32mr:
|
||||
case X86::ADD32ri:
|
||||
case X86::ADD32ri8:
|
||||
case X86::ADD32ri8_DB:
|
||||
case X86::ADD32ri_DB:
|
||||
case X86::ADD32rm:
|
||||
case X86::ADD32rr:
|
||||
case X86::ADD32rr_DB:
|
||||
case X86::ADD32rr_REV:
|
||||
case X86::ADD64i32:
|
||||
case X86::ADD64mi32:
|
||||
case X86::ADD64mi8:
|
||||
case X86::ADD64mr:
|
||||
case X86::ADD64ri32:
|
||||
case X86::ADD64ri32_DB:
|
||||
case X86::ADD64ri8:
|
||||
case X86::ADD64ri8_DB:
|
||||
case X86::ADD64rm:
|
||||
case X86::ADD64rr:
|
||||
case X86::ADD64rr_DB:
|
||||
case X86::ADD64rr_REV:
|
||||
case X86::ADD8i8:
|
||||
case X86::ADD8mi:
|
||||
case X86::ADD8mi8:
|
||||
case X86::ADD8mr:
|
||||
case X86::ADD8ri:
|
||||
case X86::ADD8ri8:
|
||||
case X86::ADD8rm:
|
||||
case X86::ADD8rr:
|
||||
case X86::ADD8rr_REV:
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
bool isAND(unsigned Opcode) {
|
||||
switch (Opcode) {
|
||||
default:
|
||||
return false;
|
||||
case X86::AND16i16:
|
||||
case X86::AND16mi:
|
||||
case X86::AND16mi8:
|
||||
case X86::AND16mr:
|
||||
case X86::AND16ri:
|
||||
case X86::AND16ri8:
|
||||
case X86::AND16rm:
|
||||
case X86::AND16rr:
|
||||
case X86::AND16rr_REV:
|
||||
case X86::AND32i32:
|
||||
case X86::AND32mi:
|
||||
case X86::AND32mi8:
|
||||
case X86::AND32mr:
|
||||
case X86::AND32ri:
|
||||
case X86::AND32ri8:
|
||||
case X86::AND32rm:
|
||||
case X86::AND32rr:
|
||||
case X86::AND32rr_REV:
|
||||
case X86::AND64i32:
|
||||
case X86::AND64mi32:
|
||||
case X86::AND64mi8:
|
||||
case X86::AND64mr:
|
||||
case X86::AND64ri32:
|
||||
case X86::AND64ri8:
|
||||
case X86::AND64rm:
|
||||
case X86::AND64rr:
|
||||
case X86::AND64rr_REV:
|
||||
case X86::AND8i8:
|
||||
case X86::AND8mi:
|
||||
case X86::AND8mi8:
|
||||
case X86::AND8mr:
|
||||
case X86::AND8ri:
|
||||
case X86::AND8ri8:
|
||||
case X86::AND8rm:
|
||||
case X86::AND8rr:
|
||||
case X86::AND8rr_REV:
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
bool isCMP(unsigned Opcode) {
|
||||
switch (Opcode) {
|
||||
default:
|
||||
return false;
|
||||
case X86::CMP16i16:
|
||||
case X86::CMP16mi:
|
||||
case X86::CMP16mi8:
|
||||
case X86::CMP16mr:
|
||||
case X86::CMP16ri:
|
||||
case X86::CMP16ri8:
|
||||
case X86::CMP16rm:
|
||||
case X86::CMP16rr:
|
||||
case X86::CMP16rr_REV:
|
||||
case X86::CMP32i32:
|
||||
case X86::CMP32mi:
|
||||
case X86::CMP32mi8:
|
||||
case X86::CMP32mr:
|
||||
case X86::CMP32ri:
|
||||
case X86::CMP32ri8:
|
||||
case X86::CMP32rm:
|
||||
case X86::CMP32rr:
|
||||
case X86::CMP32rr_REV:
|
||||
case X86::CMP64i32:
|
||||
case X86::CMP64mi32:
|
||||
case X86::CMP64mi8:
|
||||
case X86::CMP64mr:
|
||||
case X86::CMP64ri32:
|
||||
case X86::CMP64ri8:
|
||||
case X86::CMP64rm:
|
||||
case X86::CMP64rr:
|
||||
case X86::CMP64rr_REV:
|
||||
case X86::CMP8i8:
|
||||
case X86::CMP8mi:
|
||||
case X86::CMP8mi8:
|
||||
case X86::CMP8mr:
|
||||
case X86::CMP8ri:
|
||||
case X86::CMP8ri8:
|
||||
case X86::CMP8rm:
|
||||
case X86::CMP8rr:
|
||||
case X86::CMP8rr_REV:
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
bool isDEC(unsigned Opcode) {
|
||||
switch (Opcode) {
|
||||
default:
|
||||
return false;
|
||||
case X86::DEC16m:
|
||||
case X86::DEC16r:
|
||||
case X86::DEC16r_alt:
|
||||
case X86::DEC32m:
|
||||
case X86::DEC32r:
|
||||
case X86::DEC32r_alt:
|
||||
case X86::DEC64r:
|
||||
case X86::DEC64m:
|
||||
case X86::DEC8m:
|
||||
case X86::DEC8r:
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
bool isINC(unsigned Opcode) {
|
||||
switch (Opcode) {
|
||||
default:
|
||||
return false;
|
||||
case X86::INC16m:
|
||||
case X86::INC16r:
|
||||
case X86::INC16r_alt:
|
||||
case X86::INC32m:
|
||||
case X86::INC32r:
|
||||
case X86::INC32r_alt:
|
||||
case X86::INC64r:
|
||||
case X86::INC64m:
|
||||
case X86::INC8m:
|
||||
case X86::INC8r:
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
bool isSUB(unsigned Opcode) {
|
||||
switch (Opcode) {
|
||||
default:
|
||||
return false;
|
||||
case X86::SUB16i16:
|
||||
case X86::SUB16mi:
|
||||
case X86::SUB16mi8:
|
||||
case X86::SUB16mr:
|
||||
case X86::SUB16ri:
|
||||
case X86::SUB16ri8:
|
||||
case X86::SUB16rm:
|
||||
case X86::SUB16rr:
|
||||
case X86::SUB16rr_REV:
|
||||
case X86::SUB32i32:
|
||||
case X86::SUB32mi:
|
||||
case X86::SUB32mi8:
|
||||
case X86::SUB32mr:
|
||||
case X86::SUB32ri:
|
||||
case X86::SUB32ri8:
|
||||
case X86::SUB32rm:
|
||||
case X86::SUB32rr:
|
||||
case X86::SUB32rr_REV:
|
||||
case X86::SUB64i32:
|
||||
case X86::SUB64mi32:
|
||||
case X86::SUB64mi8:
|
||||
case X86::SUB64mr:
|
||||
case X86::SUB64ri32:
|
||||
case X86::SUB64ri8:
|
||||
case X86::SUB64rm:
|
||||
case X86::SUB64rr:
|
||||
case X86::SUB64rr_REV:
|
||||
case X86::SUB8i8:
|
||||
case X86::SUB8mi:
|
||||
case X86::SUB8mi8:
|
||||
case X86::SUB8mr:
|
||||
case X86::SUB8ri:
|
||||
case X86::SUB8ri8:
|
||||
case X86::SUB8rm:
|
||||
case X86::SUB8rr:
|
||||
case X86::SUB8rr_REV:
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
bool isTEST(unsigned Opcode) {
|
||||
switch (Opcode) {
|
||||
default:
|
||||
return false;
|
||||
case X86::TEST16i16:
|
||||
case X86::TEST16mi:
|
||||
case X86::TEST16mr:
|
||||
case X86::TEST16ri:
|
||||
case X86::TEST16rr:
|
||||
case X86::TEST32i32:
|
||||
case X86::TEST32mi:
|
||||
case X86::TEST32mr:
|
||||
case X86::TEST32ri:
|
||||
case X86::TEST32rr:
|
||||
case X86::TEST64i32:
|
||||
case X86::TEST64mi32:
|
||||
case X86::TEST64mr:
|
||||
case X86::TEST64ri32:
|
||||
case X86::TEST64rr:
|
||||
case X86::TEST8i8:
|
||||
case X86::TEST8mi:
|
||||
case X86::TEST8mr:
|
||||
case X86::TEST8ri:
|
||||
case X86::TEST8rr:
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
class X86MCPlusBuilder : public MCPlusBuilder {
|
||||
public:
|
||||
X86MCPlusBuilder(const MCInstrAnalysis *Analysis, const MCInstrInfo *Info,
|
||||
|
@ -351,9 +600,7 @@ public:
|
|||
}
|
||||
|
||||
bool isSUB(const MCInst &Inst) const override {
|
||||
return Inst.getOpcode() == X86::SUB64rr ||
|
||||
Inst.getOpcode() == X86::SUB64ri32 ||
|
||||
Inst.getOpcode() == X86::SUB64ri8;
|
||||
return ::isSUB(Inst.getOpcode());
|
||||
}
|
||||
|
||||
bool isADDri(const MCInst &Inst) const {
|
||||
|
@ -676,6 +923,81 @@ public:
|
|||
return (Desc.TSFlags & X86II::EncodingMask) == X86II::EVEX;
|
||||
}
|
||||
|
||||
bool isMacroOpFusionPair(ArrayRef<MCInst> Insts) const override {
|
||||
// FIXME: the macro-op fusion is triggered under different conditions
|
||||
// on different cores. This implementation is for sandy-bridge+.
|
||||
auto I = Insts.begin();
|
||||
while (I != Insts.end() && isPrefix(*I))
|
||||
++I;
|
||||
if (I == Insts.end())
|
||||
return false;
|
||||
|
||||
const auto &FirstInst = *I;
|
||||
++I;
|
||||
while (I != Insts.end() && isPrefix(*I))
|
||||
++I;
|
||||
if (I == Insts.end())
|
||||
return false;
|
||||
const auto &SecondInst = *I;
|
||||
|
||||
if (!isConditionalBranch(SecondInst))
|
||||
return false;
|
||||
// J?CXZ and LOOP cannot be fused
|
||||
if (SecondInst.getOpcode() == X86::LOOP ||
|
||||
SecondInst.getOpcode() == X86::LOOPE ||
|
||||
SecondInst.getOpcode() == X86::LOOPNE ||
|
||||
SecondInst.getOpcode() == X86::JECXZ ||
|
||||
SecondInst.getOpcode() == X86::JRCXZ)
|
||||
return false;
|
||||
|
||||
// Cannot fuse if first instruction operands are MEM-IMM.
|
||||
auto const &Desc = Info->get(FirstInst.getOpcode());
|
||||
auto MemOpNo = X86II::getMemoryOperandNo(Desc.TSFlags);
|
||||
if (MemOpNo != -1 && X86II::hasImm(Desc.TSFlags))
|
||||
return false;
|
||||
|
||||
// Cannot fuse if the first instruction uses RIP-relative memory.
|
||||
// FIXME: verify that this is true.
|
||||
if (hasPCRelOperand(FirstInst))
|
||||
return false;
|
||||
|
||||
// Check instructions against table 3-1 in Intel's Optimization Guide.
|
||||
unsigned FirstInstGroup = 0;
|
||||
if (isTEST(FirstInst.getOpcode()) || isAND(FirstInst.getOpcode())) {
|
||||
FirstInstGroup = 1;
|
||||
} else if (isCMP(FirstInst.getOpcode()) || isADD(FirstInst.getOpcode()) ||
|
||||
::isSUB(FirstInst.getOpcode())) {
|
||||
FirstInstGroup = 2;
|
||||
} else if (isINC(FirstInst.getOpcode()) || isDEC(FirstInst.getOpcode())) {
|
||||
FirstInstGroup = 3;
|
||||
}
|
||||
if (FirstInstGroup == 0)
|
||||
return false;
|
||||
|
||||
const auto CondCode =
|
||||
getShortBranchOpcode(getCanonicalBranchOpcode(SecondInst.getOpcode()));
|
||||
switch (CondCode) {
|
||||
default:
|
||||
llvm_unreachable("unexpected conditional code");
|
||||
return false;
|
||||
case X86::JE_1:
|
||||
case X86::JL_1:
|
||||
case X86::JG_1:
|
||||
return true;
|
||||
case X86::JO_1:
|
||||
case X86::JP_1:
|
||||
case X86::JS_1:
|
||||
if (FirstInstGroup == 1)
|
||||
return true;
|
||||
return false;
|
||||
case X86::JA_1:
|
||||
case X86::JB_1:
|
||||
if (FirstInstGroup != 3)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool evaluateX86MemoryOperand(const MCInst &Inst,
|
||||
unsigned *BaseRegNum,
|
||||
int64_t *ScaleImm,
|
||||
|
|
Loading…
Reference in New Issue