From 8d5854ef092753f20a4f531e386f503d5d111515 Mon Sep 17 00:00:00 2001 From: Maksim Panchenko Date: Wed, 31 Jul 2019 16:03:49 -0700 Subject: [PATCH] [BOLT] Add option to verify instruction encoder/decoder Summary: Add option `-check-encoding` to verify if the input to LLVM disassembler matches the output of the assembler. When set, the verification runs on every instruction in processed functions. I'm not enabling the option by default as it could be quite noisy on x86 where instruction encoding is ambiguous and can include redundant prefixes. (cherry picked from FBD16595415) --- bolt/src/BinaryContext.cpp | 8 +++----- bolt/src/BinaryContext.h | 14 ++++++++++++++ bolt/src/BinaryFunction.cpp | 34 +++++++++++++++++++++++++++++++--- 3 files changed, 48 insertions(+), 8 deletions(-) diff --git a/bolt/src/BinaryContext.cpp b/bolt/src/BinaryContext.cpp index ab32825554c7..329dabb52d96 100644 --- a/bolt/src/BinaryContext.cpp +++ b/bolt/src/BinaryContext.cpp @@ -685,11 +685,9 @@ bool BinaryContext::hasValidCodePadding(const BinaryFunction &BF) { << Twine::utohexstr(BF.getAddress() + BF.getSize()) << " starting at offset " << (Offset - BF.getSize()) << " in function " - << BF << '\n'; - for (auto I = BF.getSize(); I < BF.getMaxSize(); ++I) { - errs() << format("%.2x ", (*FunctionData)[I]); - } - errs() << '\n'; + << BF << '\n' + << FunctionData->slice(BF.getSize(), BF.getMaxSize() - BF.getSize()) + << '\n'; } return false; diff --git a/bolt/src/BinaryContext.h b/bolt/src/BinaryContext.h index ae5c9c34aff0..0d9388a01e04 100644 --- a/bolt/src/BinaryContext.h +++ b/bolt/src/BinaryContext.h @@ -20,6 +20,7 @@ #include "JumpTable.h" #include "MCPlusBuilder.h" #include "llvm/ADT/iterator.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/Triple.h" #include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h" #include "llvm/DebugInfo/DWARF/DWARFContext.h" @@ -46,6 +47,7 @@ #include #include #include +#include #include #include @@ -1039,6 +1041,18 @@ public: } }; +template > +inline raw_ostream &operator<<(raw_ostream &OS, + const ArrayRef &ByteArray) { + const char *Sep = ""; + for (const auto Byte : ByteArray) { + OS << Sep << format("%.2x", Byte); + Sep = " "; + } + return OS; +} + } // namespace bolt } // namespace llvm diff --git a/bolt/src/BinaryFunction.cpp b/bolt/src/BinaryFunction.cpp index 47d1e4f1759b..fc4a87533d4f 100644 --- a/bolt/src/BinaryFunction.cpp +++ b/bolt/src/BinaryFunction.cpp @@ -81,11 +81,15 @@ AlignMacroOpFusion("align-macro-fusion", cl::cat(BoltRelocCategory)); cl::opt -PreserveBlocksAlignment("preserve-blocks-alignment", - cl::desc("try to preserve basic block alignment"), +CheckEncoding("check-encoding", + cl::desc("perform verification of LLVM instruction encoding/decoding. " + "Every instruction in the input is decoded and re-encoded. " + "If the resulting bytes do not match the input, a warning message " + "is printed."), cl::init(false), cl::ZeroOrMore, - cl::cat(BoltOptCategory)); + cl::Hidden, + cl::cat(BoltCategory)); static cl::opt DotToolTipCode("dot-tooltip-code", @@ -114,6 +118,13 @@ JumpTables("jump-tables", cl::ZeroOrMore, cl::cat(BoltOptCategory)); +cl::opt +PreserveBlocksAlignment("preserve-blocks-alignment", + cl::desc("try to preserve basic block alignment"), + cl::init(false), + cl::ZeroOrMore, + cl::cat(BoltOptCategory)); + cl::opt PrintDynoStats("dyno-stats", cl::desc("print execution info based on profile"), @@ -1016,6 +1027,23 @@ void BinaryFunction::disassemble(ArrayRef FunctionData) { break; } + // Check integrity of LLVM assembler/disassembler. + if (opts::CheckEncoding && !BC.MIB->isBranch(Instruction) && + !BC.MIB->isCall(Instruction) && !BC.MIB->isNoop(Instruction)) { + SmallString<256> Code; + SmallVector Fixups; + raw_svector_ostream VecOS(Code); + BC.MCE->encodeInstruction(Instruction, VecOS, Fixups, *BC.STI); + auto EncodedData = ArrayRef((uint8_t *)Code.data(), Code.size()); + if (FunctionData.slice(Offset, Size) != EncodedData) { + errs() << "BOLT-WARNING: mismatching LLVM encoding detected in " + << "function " << *this << ":\n"; + BC.printInstruction(errs(), Instruction, AbsoluteInstrAddr); + errs() << " input: " << FunctionData.slice(Offset, Size) + << "\n output: " << EncodedData << "\n\n"; + } + } + // Cannot process functions with AVX-512 instructions. if (MIB->hasEVEXEncoding(Instruction)) { if (opts::Verbosity >= 1) {